Skip to content

Commit

Permalink
Thread-local arenas
Browse files Browse the repository at this point in the history
Currently, all threads use the same arena for imaging. This can
result in a lot of contention when there are enough workers and
the mutex is constantly being checked.

This commit instead introduces lockless thread-local arenas for
environments that support it.
  • Loading branch information
Kevin Newton committed Jan 13, 2025
1 parent 128f3f4 commit 350283e
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 37 deletions.
67 changes: 60 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
# ------------------------------
from __future__ import annotations

import distutils.ccompiler
import os
import re
import shutil
import struct
import subprocess
import sys
import tempfile
import warnings
from collections.abc import Iterator
from typing import Any
Expand Down Expand Up @@ -292,6 +294,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None:
return None


def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool:
try:
with tempfile.TemporaryDirectory() as d:
fn = os.path.join(d, "test.c")
with open(fn, "w") as f:
f.write(code)
compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"])
return True
except distutils.ccompiler.CompileError:
return False


def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool:
code = f"""
#pragma GCC diagnostic error "-Wattributes"
#pragma clang diagnostic error "-Wattributes"
int {attr} foo;
int main() {{
return 0;
}}
"""

return _try_compile(compiler, code)


def _try_compile_tls_define_macro(
compiler: distutils.ccompiler.CCompiler,
) -> tuple[str, str | None] | None:
if _try_compile_attr(compiler, "thread_local"): # C23
return ("HAVE_THREAD_LOCAL", None)
elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17
return ("HAVE__THREAD_LOCAL", None)
elif _try_compile_attr(compiler, "__thread"): # GCC/clang
return ("HAVE___THREAD", None)
elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC
return ("HAVE___DECLSPEC_THREAD_", None)
else:
return None


class pil_build_ext(build_ext):
class ext_feature:
features = [
Expand Down Expand Up @@ -426,13 +469,14 @@ def finalize_options(self) -> None:
def _update_extension(
self,
name: str,
libraries: list[str] | list[str | bool | None],
libraries: list[str] | list[str | bool | None] | None = None,
define_macros: list[tuple[str, str | None]] | None = None,
sources: list[str] | None = None,
) -> None:
for extension in self.extensions:
if extension.name == name:
extension.libraries += libraries
if libraries is not None:
extension.libraries += libraries
if define_macros is not None:
extension.define_macros += define_macros
if sources is not None:
Expand Down Expand Up @@ -888,17 +932,26 @@ def build_extensions(self) -> None:
if struct.unpack("h", b"\0\1")[0] == 1:
defs.append(("WORDS_BIGENDIAN", None))

tls_define_macro = _try_compile_tls_define_macro(self.compiler)
tls_define_macros = [] if tls_define_macro is None else [tls_define_macro]

defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"'))

self._update_extension("PIL._imaging", libs, defs)
self._update_extension("PIL._imaging", libs, defs + tls_define_macros)
self._update_extension(
"PIL._imagingmath", define_macros=(defs + tls_define_macros)
)
self._update_extension(
"PIL._imagingmorph", define_macros=(defs + tls_define_macros)
)

#
# additional libraries

if feature.get("freetype"):
srcs = []
libs = ["freetype"]
defs = []
defs = [] + tls_define_macros
if feature.get("raqm"):
if not feature.want_vendor("raqm"): # using system Raqm
defs.append(("HAVE_RAQM", None))
Expand All @@ -922,19 +975,19 @@ def build_extensions(self) -> None:
libs = [feature.get("lcms")]
if sys.platform == "win32":
libs.extend(["user32", "gdi32"])
self._update_extension("PIL._imagingcms", libs)
self._update_extension("PIL._imagingcms", libs, tls_define_macros[:])
else:
self._remove_extension("PIL._imagingcms")

webp = feature.get("webp")
if isinstance(webp, str):
libs = [webp, webp + "mux", webp + "demux"]
self._update_extension("PIL._webp", libs)
self._update_extension("PIL._webp", libs, tls_define_macros[:])
else:
self._remove_extension("PIL._webp")

tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else []
self._update_extension("PIL._imagingtk", tk_libs)
self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros[:])

build_ext.build_extensions(self)

Expand Down
36 changes: 18 additions & 18 deletions src/_imaging.c
Original file line number Diff line number Diff line change
Expand Up @@ -3931,7 +3931,7 @@ _get_stats(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingMemoryArena arena = &ImagingDefaultArena;

v = PyLong_FromLong(arena->stats_new_count);
Expand All @@ -3958,7 +3958,7 @@ _get_stats(PyObject *self, PyObject *args) {
PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None);
Py_XDECREF(v);

MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return d;
}

Expand All @@ -3968,14 +3968,14 @@ _reset_stats(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingMemoryArena arena = &ImagingDefaultArena;
arena->stats_new_count = 0;
arena->stats_allocated_blocks = 0;
arena->stats_reused_blocks = 0;
arena->stats_reallocated_blocks = 0;
arena->stats_freed_blocks = 0;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);

Py_INCREF(Py_None);
return Py_None;
Expand All @@ -3987,9 +3987,9 @@ _get_alignment(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int alignment = ImagingDefaultArena.alignment;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return PyLong_FromLong(alignment);
}

Expand All @@ -3999,9 +3999,9 @@ _get_block_size(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int block_size = ImagingDefaultArena.block_size;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return PyLong_FromLong(block_size);
}

Expand All @@ -4011,9 +4011,9 @@ _get_blocks_max(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int blocks_max = ImagingDefaultArena.blocks_max;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return PyLong_FromLong(blocks_max);
}

Expand All @@ -4034,9 +4034,9 @@ _set_alignment(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.alignment = alignment;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);

Py_INCREF(Py_None);
return Py_None;
Expand All @@ -4059,9 +4059,9 @@ _set_block_size(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.block_size = block_size;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);

Py_INCREF(Py_None);
return Py_None;
Expand All @@ -4085,9 +4085,9 @@ _set_blocks_max(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
if (!status) {
return ImagingError_MemoryError();
}
Expand All @@ -4104,9 +4104,9 @@ _clear_cache(PyObject *self, PyObject *args) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingMemoryClearCache(&ImagingDefaultArena, i);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);

Py_INCREF(Py_None);
return Py_None;
Expand Down
31 changes: 29 additions & 2 deletions src/libImaging/Imaging.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,32 @@ struct ImagingPaletteInstance {
int keep_cache; /* This palette will be reused; keep cache */
};

#define IMAGING_ARENA_LOCK(m)
#define IMAGING_ARENA_UNLOCK(m)

#if defined(__cplusplus)
#define IMAGING_ARENA_TLS thread_local
#elif defined(HAVE_THREAD_LOCAL)
#define IMAGING_ARENA_TLS thread_local
#elif defined(HAVE__THREAD_LOCAL)
#define IMAGING_ARENA_TLS _Thread_local
#elif defined(HAVE___THREAD)
#define IMAGING_ARENA_TLS __thread
#elif defined(HAVE___DECLSPEC_THREAD_)
#define IMAGING_ARENA_TLS __declspec(thread)
#elif defined(Py_GIL_DISABLED)
#define IMAGING_ARENA_TLS
#define IMAGING_ARENA_LOCKING

#undef IMAGING_ARENA_LOCK
#undef IMAGING_ARENA_UNLOCK

#define IMAGING_ARENA_LOCK(m) PyMutex_Lock(m)
#define IMAGING_ARENA_UNLOCK(m) PyMutex_Unlock(m)
#else
#define IMAGING_ARENA_TLS
#endif

typedef struct ImagingMemoryArena {
int alignment; /* Alignment in memory of each line of an image */
int block_size; /* Preferred block size, bytes */
Expand All @@ -161,15 +187,16 @@ typedef struct ImagingMemoryArena {
int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
after retrieving */
int stats_freed_blocks; /* Number of freed blocks */
#ifdef Py_GIL_DISABLED

#ifdef IMAGING_ARENA_LOCKING
PyMutex mutex;
#endif
} *ImagingMemoryArena;

/* Objects */
/* ------- */

extern struct ImagingMemoryArena ImagingDefaultArena;
extern IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena;
extern int
ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
extern void
Expand Down
20 changes: 10 additions & 10 deletions src/libImaging/Storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
break;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.stats_new_count += 1;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);

return im;
}
Expand Down Expand Up @@ -259,7 +259,7 @@ ImagingDelete(Imaging im) {

#define IMAGING_PAGE_SIZE (4096)

struct ImagingMemoryArena ImagingDefaultArena = {
IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena = {
1, // alignment
16 * 1024 * 1024, // block_size
0, // blocks_max
Expand All @@ -270,7 +270,7 @@ struct ImagingMemoryArena ImagingDefaultArena = {
0,
0,
0, // Stats
#ifdef Py_GIL_DISABLED
#ifdef IMAGING_ARENA_LOCKING
{0},
#endif
};
Expand Down Expand Up @@ -369,12 +369,12 @@ ImagingDestroyArray(Imaging im) {
int y = 0;

if (im->blocks) {
MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
while (im->blocks[y].ptr) {
memory_return_block(&ImagingDefaultArena, im->blocks[y]);
y += 1;
}
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
free(im->blocks);
}
}
Expand Down Expand Up @@ -504,21 +504,21 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
return NULL;
}

MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
Imaging tmp = ImagingAllocateArray(
im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size
);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
if (tmp) {
return im;
}

ImagingError_Clear();

// Try to allocate the image once more with smallest possible block size
MUTEX_LOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
if (tmp) {
return im;
}
Expand Down

0 comments on commit 350283e

Please sign in to comment.