From 5bffed562bc857154c8fab13cfc3f494080f7fdd Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 19 Aug 2021 11:08:25 -0400 Subject: [PATCH 1/5] Reduce memory & redundant work for concurrent TimeZones construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit improves the thread-local caching scheme introduced in https://github.com/JuliaTime/TimeZones.jl/pull/344, by sharing TimeZones across _all_ thread-local caches (so there's only ever one TimeZone object instance created per process), and reduces redundant work caused by multiple concurrent Tasks starting to construct the same TimeZone at the same time. Before this commit, multiple Tasks that try to construct the same TimeZone on different threads would have constructed the same object multiple times. Whereas now, they would share it, via the "Future". ------------------------------------------------------------ It's difficult to show this effect, but one way to show it is by logging when a TimeZone is constructed, via this diff: ```julia diff --git a/src/types/timezone.jl b/src/types/timezone.jl index 25d36c3..1cea69e 100644 --- a/src/types/timezone.jl +++ b/src/types/timezone.jl @@ -68,6 +68,7 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT)) # Note: If the class `mask` does not match the time zone we'll still load the # information into the cache to ensure the result is consistent. tz, class = get!(_tz_cache(), str) do + Core.println("CONSTRUCTING $str") tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...) if isfile(tz_path) ``` Before this commit, you can see that every thread constructs the object twice - once before the clear and once after (total of 8 times): ```julia julia> Threads.nthreads() 4 julia> TimeZones.TZData.compile(); julia> foo() = begin @sync for i in 1:20 if (i == 10) @info "---------clear-------" TimeZones.TZData.compile() end Threads.@spawn begin TimeZone("US/Eastern", TimeZones.Class(:LEGACY)) end end @info "done" end foo (generic function with 1 method) julia> @time foo() [ Info: ---------clear------- CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern CONSTRUCTING US/Eastern [ Info: done 0.391298 seconds (1.51 M allocations: 64.544 MiB, 2.46% gc time, 0.00% compilation time) ``` After this commit, it's constructed only twice - once before the clear and once after: ```julia julia> @time foo() [ Info: ---------clear------- [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: done 0.414059 seconds (1.46 M allocations: 61.972 MiB, 4.55% gc time) ``` ------------------------------------------------------------------ Finally, the other problem this avoids is if we ever accidentally introduce a Task yield inside the constructor, which could happen if we used `@info` instead of `Core.println()`, then without this PR, the old code could potentially do _redundant work_ to construct the TimeZone multiple times - even on the same thread - since each Task's constructor would see that there's no TZ in the cache, start the work, and then yield to the next Task, which would do the same, until finally they all report their work into the cache, overwriting each other. This is what happens if we use `@info` in the above diff, instead: Before this commit - the results are nondeterministic, but in this run, you can see it redundantly constructed the value all 20 times!: ```julia julia> @time foo() [ Info: ---------clear------- [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: done 0.494492 seconds (1.55 M allocations: 66.754 MiB, 16.67% gc time) ``` After this commit, just the two we expect. 😊 : ```julia julia> @time foo() [ Info: ---------clear------- [ Info: CONSTRUCTING US/Eastern [ Info: CONSTRUCTING US/Eastern [ Info: done 0.422677 seconds (1.47 M allocations: 62.228 MiB, 4.66% gc time) ``` --- src/TimeZones.jl | 3 +- src/types/timezone.jl | 68 +++++++++++++++++++++++++++++++++---------- src/tzdata/TZData.jl | 2 +- src/tzdata/compile.jl | 9 ++---- 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/TimeZones.jl b/src/TimeZones.jl index b69e0becb..5c68e99aa 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -1,5 +1,6 @@ module TimeZones +using Base: @lock using Dates using Printf using Serialization @@ -39,7 +40,7 @@ abstract type Local <: TimeZone end function __init__() # Initialize the thread-local TimeZone cache (issue #342) - _reset_tz_cache() + _tz_cache_init() # Base extension needs to happen everytime the module is loaded (issue #24) Dates.CONVERSION_SPECIFIERS['z'] = TimeZone diff --git a/src/types/timezone.jl b/src/types/timezone.jl index 7d198b936..07e4ed3d6 100644 --- a/src/types/timezone.jl +++ b/src/types/timezone.jl @@ -4,6 +4,12 @@ # to the cache, while still being thread-safe. const THREAD_TZ_CACHES = Vector{Dict{String,Tuple{TimeZone,Class}}}() +# Holding a lock during construction of a specific TimeZone prevents multiple Tasks (on the +# same or different threads) from attempting to construct the same TimeZone object, and +# allows them all to share the result. +const tz_cache_mutex = ReentrantLock() +const TZ_CACHE_FUTURES = Dict{String,Channel{Tuple{TimeZone,Class}}}() # Guarded by: tz_cache_mutex + # Based upon the thread-safe Global RNG implementation in the Random stdlib: # https://github.com/JuliaLang/julia/blob/e4fcdf5b04fd9751ce48b0afc700330475b42443/stdlib/Random/src/RNGs.jl#L369-L385 @inline _tz_cache() = _tz_cache(Threads.threadid()) @@ -19,10 +25,22 @@ const THREAD_TZ_CACHES = Vector{Dict{String,Tuple{TimeZone,Class}}}() end @noinline _tz_cache_length_assert() = @assert false "0 < tid <= length(THREAD_TZ_CACHES)" -function _reset_tz_cache() - # ensures that we didn't save a bad object +function _tz_cache_init() resize!(empty!(THREAD_TZ_CACHES), Threads.nthreads()) end +# ensures that we didn't save a bad object +function _reset_tz_cache() + # Since we use thread-local caches, we spawn a task on _each thread_ to clear that + # thread's local cache. + Threads.@threads for i in 1:Threads.nthreads() + @assert Threads.threadid() === i "TimeZones.TZData.compile() must be called from the main, top-level Task." + empty!(_tz_cache()) + end + @lock tz_cache_mutex begin + empty!(TZ_CACHE_FUTURES) + end + return nothing +end """ TimeZone(str::AbstractString) -> TimeZone @@ -68,20 +86,40 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT)) # Note: If the class `mask` does not match the time zone we'll still load the # information into the cache to ensure the result is consistent. tz, class = get!(_tz_cache(), str) do - tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...) - - if isfile(tz_path) - open(deserialize, tz_path, "r") - elseif occursin(FIXED_TIME_ZONE_REGEX, str) - FixedTimeZone(str), Class(:FIXED) - elseif !isdir(TZData.COMPILED_DIR) || isempty(readdir(TZData.COMPILED_DIR)) - # Note: Julia 1.0 supresses the build logs which can hide issues in time zone - # compliation which result in no tzdata time zones being available. - throw(ArgumentError( - "Unable to find time zone \"$str\". Try running `TimeZones.build()`." - )) + # Even though we're using Thread-local caches, we still need to lock during + # construction to prevent multiple tasks redundantly constructing the same object, + # and potential thread safety violations due to Tasks migrating threads. + # NOTE that we only grab the lock if the TZ doesn't exist, so the mutex contention + # is not on the critical path for most constructors. :) + constructing = false + # We lock the mutex, but for only a short, *constant time* duration, to grab the + # future for this TimeZone, or create the future if it doesn't exist. + future = @lock tz_cache_mutex begin + get!(TZ_CACHE_FUTURES, str) do + constructing = true + Channel{Tuple{TimeZone,Class}}(1) + end + end + if constructing + tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...) + + tz = if isfile(tz_path) + open(deserialize, tz_path, "r") + elseif occursin(FIXED_TIME_ZONE_REGEX, str) + FixedTimeZone(str), Class(:FIXED) + elseif !isdir(TZData.COMPILED_DIR) || isempty(readdir(TZData.COMPILED_DIR)) + # Note: Julia 1.0 supresses the build logs which can hide issues in time zone + # compliation which result in no tzdata time zones being available. + throw(ArgumentError( + "Unable to find time zone \"$str\". Try running `TimeZones.build()`." + )) + else + throw(ArgumentError("Unknown time zone \"$str\"")) + end + + put!(future, tz) else - throw(ArgumentError("Unknown time zone \"$str\"")) + fetch(future) end end diff --git a/src/tzdata/TZData.jl b/src/tzdata/TZData.jl index 54843de0b..e3a093d98 100644 --- a/src/tzdata/TZData.jl +++ b/src/tzdata/TZData.jl @@ -1,7 +1,7 @@ module TZData using Printf -using ...TimeZones: DEPS_DIR +using ...TimeZones: DEPS_DIR, _reset_tz_cache import Pkg diff --git a/src/tzdata/compile.jl b/src/tzdata/compile.jl index aa89401a5..a66dc7a38 100644 --- a/src/tzdata/compile.jl +++ b/src/tzdata/compile.jl @@ -2,7 +2,7 @@ using Dates using Serialization using Dates: parse_components -using ...TimeZones: _tz_cache +using ...TimeZones: _reset_tz_cache using ...TimeZones: TimeZones, TimeZone, FixedTimeZone, VariableTimeZone, Transition, Class using ...TimeZones: rename using ..TZData: TimeOffset, ZERO, MIN_GMT_OFFSET, MAX_GMT_OFFSET, MIN_SAVE, MAX_SAVE, @@ -696,12 +696,7 @@ function compile(tz_source::TZSource, dest_dir::AbstractString; kwargs...) isdir(dest_dir) || error("Destination directory doesn't exist") # When we recompile the TimeZones from a new source, we clear all the existing cached # TimeZone objects, so that newly constructed objects pick up the newly compiled rules. - # Since we use thread-local caches, we spawn a task on _each thread_ to clear that - # thread's local cache. - Threads.@threads for i in 1:Threads.nthreads() - @assert Threads.threadid() === i "TimeZones.TZData.compile() must be called from the main, top-level Task." - empty!(_tz_cache()) - end + _reset_tz_cache() for (tz, class) in results parts = split(TimeZones.name(tz), '/') From 6c0d3ead7640f7818b7cbea9dd3596423f73c225 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 20 Aug 2021 13:41:49 -0400 Subject: [PATCH 2/5] Add `@lock` macro on julia 1.0 I'm not really sure if this is necessary because probably no one is using threading on 1.0 anyway, but i guess they could be doing something with `@threads`, so better safe than sorry. I didn't find `@lock` in Compat.jl, so this seemed like the easiest approach? --- src/TimeZones.jl | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/TimeZones.jl b/src/TimeZones.jl index 5c68e99aa..1120a8918 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -1,12 +1,27 @@ module TimeZones -using Base: @lock using Dates using Printf using Serialization using RecipesBase: RecipesBase, @recipe using Unicode +if VERSION >= v"1.3-" + using Base: @lock +else + macro lock(l, e) + quote + ll = $(esc(l)); + $lock(ll); + try + $(esc(e)) + finally + $unlock(ll) + end + end + end +end + import Dates: TimeZone, UTC export TimeZone, @tz_str, istimezone, FixedTimeZone, VariableTimeZone, ZonedDateTime, From beacaaac0ca028eaf664c5df969fde350de7d7cf Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 2 Jan 2022 11:01:52 -0500 Subject: [PATCH 3/5] PR Feedback from review. - Clean up variable namings - Add TODO reference to Compat.jl for `Base.@lock` --- src/TimeZones.jl | 1 + src/types/timezone.jl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/TimeZones.jl b/src/TimeZones.jl index 1120a8918..40825002e 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -6,6 +6,7 @@ using Serialization using RecipesBase: RecipesBase, @recipe using Unicode +# TODO: Use Compat.@lock instead after https://github.com/JuliaLang/Compat.jl/issues/762. if VERSION >= v"1.3-" using Base: @lock else diff --git a/src/types/timezone.jl b/src/types/timezone.jl index 07e4ed3d6..d4005e29c 100644 --- a/src/types/timezone.jl +++ b/src/types/timezone.jl @@ -103,7 +103,7 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT)) if constructing tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...) - tz = if isfile(tz_path) + t = if isfile(tz_path) open(deserialize, tz_path, "r") elseif occursin(FIXED_TIME_ZONE_REGEX, str) FixedTimeZone(str), Class(:FIXED) @@ -117,7 +117,7 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT)) throw(ArgumentError("Unknown time zone \"$str\"")) end - put!(future, tz) + put!(future, t) else fetch(future) end From 4f444eb033677f7044fd5b3378c64da2fc8dbeb7 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 2 Jan 2022 11:05:18 -0500 Subject: [PATCH 4/5] Drop compat for `Base.@lock` now that we're on julia 1.6+ as LTS :) --- src/TimeZones.jl | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/TimeZones.jl b/src/TimeZones.jl index 2f50ee340..47e0cfaba 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -7,22 +7,7 @@ using RecipesBase: RecipesBase, @recipe using Unicode using InlineStrings: InlineString15 -# TODO: Use Compat.@lock instead after https://github.com/JuliaLang/Compat.jl/issues/762. -if VERSION >= v"1.3-" - using Base: @lock -else - macro lock(l, e) - quote - ll = $(esc(l)); - $lock(ll); - try - $(esc(e)) - finally - $unlock(ll) - end - end - end -end +using Base: @lock import Dates: TimeZone, UTC From 0258929bf29aa709442304dee2dadb5312288e75 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 13 Feb 2022 13:10:56 -0500 Subject: [PATCH 5/5] PR feedback --- src/TimeZones.jl | 2 +- src/types/timezone.jl | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/TimeZones.jl b/src/TimeZones.jl index 47e0cfaba..452c643d2 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -42,7 +42,7 @@ abstract type Local <: TimeZone end function __init__() # Initialize the thread-local TimeZone cache (issue #342) - _tz_cache_init() + _init_tz_cache() # Base extension needs to happen everytime the module is loaded (issue #24) Dates.CONVERSION_SPECIFIERS['z'] = TimeZone diff --git a/src/types/timezone.jl b/src/types/timezone.jl index f4102d07b..f74099e65 100644 --- a/src/types/timezone.jl +++ b/src/types/timezone.jl @@ -7,8 +7,8 @@ const THREAD_TZ_CACHES = Vector{Dict{String,Tuple{TimeZone,Class}}}() # Holding a lock during construction of a specific TimeZone prevents multiple Tasks (on the # same or different threads) from attempting to construct the same TimeZone object, and # allows them all to share the result. -const tz_cache_mutex = ReentrantLock() -const TZ_CACHE_FUTURES = Dict{String,Channel{Tuple{TimeZone,Class}}}() # Guarded by: tz_cache_mutex +const TZ_CACHE_MUTEX = ReentrantLock() +const TZ_CACHE_FUTURES = Dict{String,Channel{Tuple{TimeZone,Class}}}() # Guarded by: TZ_CACHE_MUTEX # Based upon the thread-safe Global RNG implementation in the Random stdlib: # https://github.com/JuliaLang/julia/blob/e4fcdf5b04fd9751ce48b0afc700330475b42443/stdlib/Random/src/RNGs.jl#L369-L385 @@ -25,7 +25,7 @@ const TZ_CACHE_FUTURES = Dict{String,Channel{Tuple{TimeZone,Class}}}() # Guarde end @noinline _tz_cache_length_assert() = @assert false "0 < tid <= length(THREAD_TZ_CACHES)" -function _tz_cache_init() +function _init_tz_cache() resize!(empty!(THREAD_TZ_CACHES), Threads.nthreads()) end # ensures that we didn't save a bad object @@ -36,7 +36,7 @@ function _reset_tz_cache() @assert Threads.threadid() === i "TimeZones.TZData.compile() must be called from the main, top-level Task." empty!(_tz_cache()) end - @lock tz_cache_mutex begin + @lock TZ_CACHE_MUTEX begin empty!(TZ_CACHE_FUTURES) end return nothing @@ -94,7 +94,7 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT)) constructing = false # We lock the mutex, but for only a short, *constant time* duration, to grab the # future for this TimeZone, or create the future if it doesn't exist. - future = @lock tz_cache_mutex begin + future = @lock TZ_CACHE_MUTEX begin get!(TZ_CACHE_FUTURES, str) do constructing = true Channel{Tuple{TimeZone,Class}}(1)