From 7c5372ee90b6c2f27353618d26c92dcb909e5afc Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Tue, 25 Jun 2024 11:03:39 -0300
Subject: [PATCH 01/10] low_memory option initial implementation

---
 src/ComplexMixtures.jl                        |   2 +-
 src/mddf.jl                                   | 274 ++++++++++--------
 src/{updatecounters.jl => update_counters.jl} |   6 +-
 3 files changed, 158 insertions(+), 124 deletions(-)
 rename src/{updatecounters.jl => update_counters.jl} (91%)

diff --git a/src/ComplexMixtures.jl b/src/ComplexMixtures.jl
index 58287d8b4..a97d025d3 100644
--- a/src/ComplexMixtures.jl
+++ b/src/ComplexMixtures.jl
@@ -67,7 +67,7 @@ include("./compare.jl")
 include("./minimum_distances.jl")
 
 # Update counters routines
-include("./updatecounters.jl")
+include("./update_counters.jl")
 
 # Main function
 include("./mddf.jl")
diff --git a/src/mddf.jl b/src/mddf.jl
index 3bab1931f..632b5a656 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -49,6 +49,27 @@ end
     end
 end
 
+#
+# Structure to carry the results of the computation
+# in threaded chunks: carries the information if the
+# results must be updated atomically or not. The locked
+# update is used in the `low_memory` mode.
+#
+struct ResultChunk{L<:Union{ReentrantLock,Nothing}}
+    R::Result
+    lock::L
+end
+# unlocked updates
+update_volume!(r_chunk::ResultChunk{Nothing}, system, frame_weight) = 
+    r_chunk.R.volume.total += frame_weight * cell_volume(system)
+update_counters!(r_chunk::ResultChunk{Nothing}, args...) =
+    update_counters!(r_chunk.R, args...)
+# locked updates
+update_volume!(r_chunk::ResultChunk{ReentrantLock}, system, frame_weight) = 
+    @lock r_chunk.lock r_chunk.R.volume.total += frame_weight * cell_volume(system)
+update_counters!(r_chunk::ResultChunk{ReentrantLock}, args...) =
+    @lock r_chunk.lock update_counters!(r_chunk.R, args...)
+
 #
 # Defines is a molecule is a bulk molecule
 #
@@ -98,7 +119,7 @@ function goto_nextframe!(iframe, R, trajectory, to_compute_frames, options)
         if iframe in to_compute_frames
             compute = true
         end
-        # Run GC if memory is getting full: this are issues with Chemfiles reading scheme
+        # Run GC if memory is getting full: this is related to issues with Chemfiles reading scheme
         if options.GC && (Sys.free_memory() / Sys.total_memory() < options.GC_threshold)
             GC.gc()
         end
@@ -159,6 +180,7 @@ function mddf(
     frame_weights=Float64[],
     coordination_number_only=false,
     mem_warn=true,
+    low_memory=false,
 )
 
     options.silent || println(bars)
@@ -238,11 +260,15 @@ function mddf(
     sum_results_lock = ReentrantLock()
     Threads.@threads for frame_range in ChunkSplitters.chunks(to_read_frames; n=nchunks)
         # Local data structures for this chunk
-        R_chunk = Result(trajectory, options; trajectory_data, frame_weights)
         system_chunk = ParticleSystem(trajectory, trajectory_data.unitcell, options)
         buff_chunk = Buffer(trajectory, R)
+        if low_memory
+            R_chunk = ResultChunk(R, sum_results_lock) # has to be updated with locks: slow but memory efficient
+        else
+            R_chunk = ResultChunk(Result(trajectory, options; trajectory_data, frame_weights), nothing)
+        end
         # Reset the number of frames read by each chunk
-        R_chunk.files[1].nframes_read = 0
+        nframes_read = 0
         for _ in frame_range
             local compute, frame_weight
             # Read frame coordinates
@@ -258,7 +284,7 @@ function mddf(
                     unitcell = convert_unitcell(trajectory_data.unitcell, getunitcell(trajectory))
                     update_unitcell!(system_chunk, unitcell)
                     # Read weight of this frame. 
-                    frame_weight = R_chunk.files[1].frame_weights[iframe]
+                    frame_weight = R_chunk.R.files[1].frame_weights[iframe]
                     # Display progress bar
                     options.silent || next!(progress)
                 end
@@ -267,7 +293,7 @@ function mddf(
             # Perform MDDF computation
             #
             if compute
-                R_chunk.files[1].nframes_read += 1
+                nframes_read += 1
                 # Compute distances in this frame and update results
                 if !coordination_number_only
                     mddf_frame!(R_chunk, system_chunk, buff_chunk, options, frame_weight, RNG)
@@ -278,8 +304,11 @@ function mddf(
         end # frame range for this chunk
 
         # Sum the results of this chunk into the total results data structure
-        @lock sum_results_lock begin
-            sum!(R, R_chunk)
+        if !low_memory
+            R_chunk.R.files[1].nframes_read = nframes_read
+            @lock sum_results_lock sum!(R, R_chunk.R)
+        else
+            @lock sum_results_lock R.files[1].nframes_read += nframes_read
         end
 
     end
@@ -298,13 +327,14 @@ cell_volume(system::AbstractParticleSystem) =
     @views dot(cross(system.unitcell[:, 1], system.unitcell[:, 2]), system.unitcell[:, 3])
 
 #=
-    mddf_frame!(R::Result, system::AbstractParticleSystem, buff::Buffer, options::Options, frame_weight, RNG)
+    mddf_frame!(r_chunk::ResultChunk, system::AbstractParticleSystem, buff::Buffer, options::Options, frame_weight, RNG)
 
-Computes the MDDF for a single frame. Modifies the data in the `R` (type `Result`) structure.
+Computes the MDDF for a single frame. Modifies the data in the `r_chunk` (type `ResultChunk`) structure, through
+the `update_volume!` and `update_counters!` functions.
 
 =#
 function mddf_frame!(
-    R::Result,
+    r_chunk::ResultChunk,
     system::AbstractParticleSystem,
     buff::Buffer,
     options::Options,
@@ -313,11 +343,11 @@ function mddf_frame!(
 )
 
     # Sum up the volume of this frame
-    R.volume.total += frame_weight * cell_volume(system)
+    update_volume!(r_chunk, system, frame_weight)
 
     # Random set of solute molecules to use as reference for the ideal gas distributions
     for i in eachindex(buff.ref_solutes)
-        buff.ref_solutes[i] = rand(RNG, 1:R.solute.nmols)
+        buff.ref_solutes[i] = rand(RNG, 1:r_chunk.R.solute.nmols)
     end
 
     #
@@ -325,22 +355,22 @@ function mddf_frame!(
     #
     update_lists = true
     system.ypositions .= buff.solvent_read
-    for isolute = 1:R.solute.nmols
+    for isolute = 1:r_chunk.R.solute.nmols
 
         # We need to do this one solute molecule at a time to avoid exploding the memory requirements
-        system.xpositions .= viewmol(isolute, buff.solute_read, R.solute)
+        system.xpositions .= viewmol(isolute, buff.solute_read, r_chunk.R.solute)
 
         # Compute minimum distances of the molecules to the solute (updates system.list, and returns it)
         # The cell lists will be recomputed for the first solute, or if a random distribution was computed
         # for the previous solute
-        minimum_distances!(system, R, isolute; update_lists=update_lists)
+        minimum_distances!(system, r_chunk.R, isolute; update_lists=update_lists)
         update_lists = false # avoid recomputation of the cell lists in the next iteration
 
         # For each solute molecule, update the counters (this is highly suboptimal, because
-        # within updatecounters there are loops over solvent molecules, in such a way that
+        # within update_counters there are loops over solvent molecules, in such a way that
         # this will loop with cost nsolute*nsolvent. However, I cannot see an easy solution 
         # at this point with acceptable memory requirements
-        updatecounters!(R, system, frame_weight)
+        update_counters!(r_chunk, system, frame_weight)
 
         # If this molecule was chosen as a reference molecule for the ideal gas distribution, compute it
         # (as many times as needed, as the reference molecules may be repeated - particularly because
@@ -354,16 +384,16 @@ function mddf_frame!(
             # Annotate the indices of the molecules that are in the bulk solution
             n_solvent_in_bulk = 0
             for i in eachindex(system.list)
-                R.autocorrelation && i == isolute && continue
+                r_chunk.R.autocorrelation && i == isolute && continue
                 if inbulk(system.list[i], options)
                     n_solvent_in_bulk += 1
                     buff.indices_in_bulk[n_solvent_in_bulk] = i
                 end
             end
             for _ = 1:nrand
-                randomize_solvent!(system, buff, n_solvent_in_bulk, R, RNG)
-                minimum_distances!(system, R, isolute; update_lists=update_lists)
-                updatecounters!(R, system, frame_weight, Val(:random))
+                randomize_solvent!(system, buff, n_solvent_in_bulk, r_chunk.R, RNG)
+                minimum_distances!(system, r_chunk.R, isolute; update_lists=update_lists)
+                update_counters!(r_chunk, system, frame_weight, Val(:random))
             end
             # Restore positions and minimum distance list of system structure
             system.list .= buff.list
@@ -372,50 +402,52 @@ function mddf_frame!(
 
     end # loop over solute molecules
 
-    return R
+    return r_chunk
 end
 
 #=
-    coordination_number_frame!(R::Result, system::AbstractParticleSystem, buff::Buffer, frame_weight)
+    coordination_number_frame!(r_chunk::ResultChunk, system::AbstractParticleSystem, buff::Buffer, frame_weight)
 
-Computes the coordination numbers for a single frame. Modifies the data in the `R` (type `Result`) structure.
+Computes the coordination number for a single frame. Modifies the data in the `r_chunk` (type `ResultChunk`) structure, through
+the `update_volume!` and `update_counters!` functions.
 
 =#
 function coordination_number_frame!(
-    R::Result,
+    r_chunk::ResultChunk,
     system::AbstractParticleSystem,
     buff::Buffer,
     frame_weight::Float64,
+    sum_results_lock::Union{<:ReentrantLock,Nothing} = nothing,
 )
 
     # Sum up the volume of this frame
-    R.volume.total += frame_weight * cell_volume(system)
+    upate_volume!(r_chunk, system, frame_weight)
 
     #
     # Compute the MDDFs for each solute molecule
     #
     update_lists = true
     system.ypositions .= buff.solvent_read
-    for isolute = 1:R.solute.nmols
+    for isolute = 1:r_chunk.R.solute.nmols
 
         # We need to do this one solute molecule at a time to avoid exploding the memory requirements
-        system.xpositions .= viewmol(isolute, buff.solute_read, R.solute)
+        system.xpositions .= viewmol(isolute, buff.solute_read, r_chunk.R.solute)
 
         # Compute minimum distances of the molecules to the solute (updates system.list, and returns it)
         # The cell lists will be recomputed for the first solute, or if a random distribution was computed
         # for the previous solute
-        minimum_distances!(system, R, isolute; update_lists=update_lists)
+        minimum_distances!(system, r_chunk.R, isolute; update_lists=update_lists)
         update_lists = false # avoid recomputation of the cell lists in the next iteration
 
         # For each solute molecule, update the counters (this is highly suboptimal, because
-        # within updatecounters there are loops over solvent molecules, in such a way that
+        # within update_counters there are loops over solvent molecules, in such a way that
         # this will loop with cost nsolute*nsolvent. However, I cannot see an easy solution 
         # at this point with acceptable memory requirements
-        updatecounters!(R, system, frame_weight)
+        update_counters!(r_chunk, system, frame_weight)
 
     end # loop over solute molecules
 
-    return R
+    return r_chunk
 end
 
 """     
@@ -440,9 +472,7 @@ This function is a wrapper around `mddf` with the `coordination_number_only` key
 ```julia-repl
 julia> trajectory = Trajectory("./trajectory.dcd",solute,solvent);
 
-julia> results = mddf(trajectory);
-
-julia> coordination_numbers = coordination_number(trajectory);
+julia> coordination_numbers = coordination_number(trajectory, Options(bulk_range=(10.0, 15.0));
 ```
 
 """
@@ -471,16 +501,16 @@ end
     water = AtomSelection(select(atoms, "resname WAT and model 1"), natomspermol=3)
     traj = Trajectory("$data_dir/toy/cross.pdb", protein, water, format="PDBTraj")
 
-    for lastframe in [1, 2]
-        options = Options(
+    for nthreads in [1,2], lastframe in [1, 2], low_memory in [false]
+        options = Options(;
             seed=321,
             StableRNG=true,
-            nthreads=1,
+            nthreads,
             silent=true,
             n_random_samples=10^5,
-            lastframe=lastframe,
+            lastframe,
         )
-        R = mddf(traj, options)
+        R = mddf(traj, options; low_memory)
         @test R.volume.total == 27000.0
         @test R.volume.domain ≈ R.volume.total - R.volume.bulk
         @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.01)
@@ -496,92 +526,96 @@ end
     atoms = readPDB("$data_dir/toy/self_monoatomic.pdb")
     atom = AtomSelection(select(atoms, "resname WAT and model 1"), natomspermol=1)
     traj = Trajectory("$data_dir/toy/self_monoatomic.pdb", atom, format="PDBTraj")
-
     # without atoms in the bulk
-    options = Options(
-        seed=321,
-        StableRNG=true,
-        nthreads=1,
-        silent=true,
-        n_random_samples=10^5,
-        lastframe=1,
-    )
-    R = mddf(traj, options)
-    @test R.volume.total == 27000.0
-    @test R.volume.domain ≈ R.volume.total - R.volume.bulk
-    @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.1)
-    @test R.density.solute ≈ 2 / R.volume.total
-    @test R.density.solvent ≈ 2 / R.volume.total
-    @test sum(R.md_count) ≈ 1.0
+    for nthreads in [1,2], low_memory in [false, true]
+        options = Options(;
+            seed=321,
+            StableRNG=true,
+            nthreads,
+            silent=true,
+            n_random_samples=10^5,
+            lastframe=1,
+        )
+        R = mddf(traj, options; low_memory)
+        @test R.volume.total == 27000.0
+        @test R.volume.domain ≈ R.volume.total - R.volume.bulk
+        @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.1)
+        @test R.density.solute ≈ 2 / R.volume.total
+        @test R.density.solvent ≈ 2 / R.volume.total
+        @test sum(R.md_count) ≈ 1.0
+    end
 
     #
     # Test varying frame weights
     #
     # Read only first frame
-    options = Options(seed=321, StableRNG=true, nthreads=1, silent=true, lastframe=1)
-    R1 = mddf(traj, options)
-    R2 = mddf(traj, options; frame_weights=[1.0, 0.0])
-    @test R1.md_count == R2.md_count
-    @test R1.rdf_count == R2.rdf_count
-    # Read only last frame
-    options = Options(seed=321, StableRNG=true, nthreads=1, silent=true, firstframe=2)
-    R1 = mddf(traj, options)
-    options = Options(seed=321, StableRNG=true, nthreads=1, silent=true)
-    R2 = mddf(traj, options; frame_weights=[0.0, 1.0])
-    @test R1.md_count == R2.md_count
-    @test R1.rdf_count == R2.rdf_count
-    # Use equal weights
-    R1 = mddf(traj, options)
-    R2 = mddf(traj, options; frame_weights=[0.3, 0.3])
-    @test R1.md_count == R2.md_count
-    @test R1.rdf_count == R2.rdf_count
-    # Check with the duplicated-first-frame trajectory 
-    traj = Trajectory("$data_dir/toy/self_monoatomic_duplicated_first_frame.pdb", atom, format="PDBTraj")
-    options = Options(seed=321, StableRNG=true, nthreads=1, silent=true, firstframe=1, lastframe=3)
-    R1 = mddf(traj, options)
-    traj = Trajectory("$data_dir/toy/self_monoatomic.pdb", atom, format="PDBTraj")
-    options = Options(seed=321, StableRNG=true, nthreads=1, silent=true)
-    R2 = mddf(traj, options; frame_weights=[2.0, 1.0])
-    @test R1.md_count == R2.md_count
-    @test R1.rdf_count == R2.rdf_count
-    # Test some different weights
-    R2 = mddf(traj, Options(silent=true), frame_weights=[2.0, 1.0])
-    @test sum(R2.md_count) ≈ 2 / 3
-    R2 = mddf(traj, Options(silent=true), frame_weights=[1.0, 2.0])
-    @test sum(R2.md_count) ≈ 1 / 3
-
-    # only with atoms in the bulk
-    options = Options(
-        seed=321,
-        StableRNG=true,
-        nthreads=1,
-        silent=true,
-        n_random_samples=10^5,
-        firstframe=2,
-    )
-    R = mddf(traj, options)
-    @test R.volume.total == 27000.0
-    @test R.volume.domain ≈ R.volume.total - R.volume.bulk
-    @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.1)
-    @test R.density.solute ≈ 2 / R.volume.total
-    @test R.density.solvent ≈ 2 / R.volume.total
-    @test R.density.solvent_bulk ≈ 1 / R.volume.bulk
-
-    # with both frames
-    options = Options(
-        seed=321,
-        StableRNG=true,
-        nthreads=1,
-        silent=true,
-        n_random_samples=10^5,
-    )
-    R = mddf(traj, options)
-    @test R.volume.total == 27000.0
-    @test R.volume.domain ≈ R.volume.total - R.volume.bulk
-    @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.1)
-    @test R.density.solute ≈ 2 / R.volume.total
-    @test R.density.solvent ≈ 2 / R.volume.total
-    @test R.density.solvent_bulk ≈ 0.5 / R.volume.bulk
+    for low_memory in [false, true]
+        options = Options(seed=321, StableRNG=true, nthreads=1, silent=true, lastframe=1)
+        traj = Trajectory("$data_dir/toy/self_monoatomic.pdb", atom, format="PDBTraj")
+        R1 = mddf(traj, options; low_memory)
+        R2 = mddf(traj, options; frame_weights=[1.0, 0.0], low_memory)
+        @test R1.md_count == R2.md_count
+        @test R1.rdf_count == R2.rdf_count
+        # Read only last frame
+        options = Options(seed=321, StableRNG=true, nthreads=1, silent=true, firstframe=2)
+        R1 = mddf(traj, options; low_memory)
+        options = Options(seed=321, StableRNG=true, nthreads=1, silent=true)
+        R2 = mddf(traj, options; frame_weights=[0.0, 1.0], low_memory)
+        @test R1.md_count == R2.md_count
+        @test R1.rdf_count == R2.rdf_count
+        # Use equal weights
+        R1 = mddf(traj, options; low_memory)
+        R2 = mddf(traj, options; frame_weights=[0.3, 0.3], low_memory)
+        @test R1.md_count == R2.md_count
+        @test R1.rdf_count == R2.rdf_count
+        # Check with the duplicated-first-frame trajectory 
+        traj = Trajectory("$data_dir/toy/self_monoatomic_duplicated_first_frame.pdb", atom, format="PDBTraj")
+        options = Options(seed=321, StableRNG=true, nthreads=1, silent=true, firstframe=1, lastframe=3)
+        R1 = mddf(traj, options; low_memory)
+        traj = Trajectory("$data_dir/toy/self_monoatomic.pdb", atom, format="PDBTraj")
+        options = Options(seed=321, StableRNG=true, nthreads=1, silent=true)
+        R2 = mddf(traj, options; frame_weights=[2.0, 1.0], low_memory)
+        @test R1.md_count == R2.md_count
+        @test R1.rdf_count == R2.rdf_count
+        # Test some different weights
+        R2 = mddf(traj, Options(silent=true); frame_weights=[2.0, 1.0], low_memory)
+        @test sum(R2.md_count) ≈ 2 / 3
+        R2 = mddf(traj, Options(silent=true); frame_weights=[1.0, 2.0], low_memory)
+        @test sum(R2.md_count) ≈ 1 / 3
+
+        # only with atoms in the bulk
+        options = Options(
+            seed=321,
+            StableRNG=true,
+            nthreads=1,
+            silent=true,
+            n_random_samples=10^5,
+            firstframe=2,
+        )
+        R = mddf(traj, options; low_memory)
+        @test R.volume.total == 27000.0
+        @test R.volume.domain ≈ R.volume.total - R.volume.bulk
+        @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.1)
+        @test R.density.solute ≈ 2 / R.volume.total
+        @test R.density.solvent ≈ 2 / R.volume.total
+        @test R.density.solvent_bulk ≈ 1 / R.volume.bulk
+
+        # with both frames
+        options = Options(
+            seed=321,
+            StableRNG=true,
+            nthreads=1,
+            silent=true,
+            n_random_samples=10^5,
+        )
+        R = mddf(traj, options; low_memory)
+        @test R.volume.total == 27000.0
+        @test R.volume.domain ≈ R.volume.total - R.volume.bulk
+        @test isapprox(R.volume.domain, (4π / 3) * R.dbulk^3; rtol=0.1)
+        @test R.density.solute ≈ 2 / R.volume.total
+        @test R.density.solvent ≈ 2 / R.volume.total
+        @test R.density.solvent_bulk ≈ 0.5 / R.volume.bulk
+    end
 
 end
 
diff --git a/src/updatecounters.jl b/src/update_counters.jl
similarity index 91%
rename from src/updatecounters.jl
rename to src/update_counters.jl
index 91f1d5e6e..98aff6ad9 100644
--- a/src/updatecounters.jl
+++ b/src/update_counters.jl
@@ -35,11 +35,11 @@ function update_group_count!(group_count, ibin, iatom, frame_weight, sol::AtomSe
 end
 
 #
-#    updatecounters!(R::Result, system::AbstractParticleSystem)
+#    update_counters!(R::Result, system::AbstractParticleSystem)
 #
 # Function that updates the minimum-distance counters in `R`
 #
-function updatecounters!(R::Result, system::AbstractParticleSystem, frame_weight::Float64)
+function update_counters!(R::Result, system::AbstractParticleSystem, frame_weight::Float64)
     for md in system.list
         !md.within_cutoff && continue
         ibin = setbin(md.d, R.files[1].options.binstep)
@@ -62,7 +62,7 @@ function updatecounters!(R::Result, system::AbstractParticleSystem, frame_weight
     return R
 end
 # Update counters for the ideal gas distributions
-function updatecounters!(R::Result, system::AbstractParticleSystem, frame_weight::Float64, ::Val{:random})
+function update_counters!(R::Result, system::AbstractParticleSystem, frame_weight::Float64, ::Val{:random})
     for md in system.list
         !md.within_cutoff && continue
         ibin = setbin(md.d, R.files[1].options.binstep)

From 3e08c849343d210aac0ed63226e6cbc11f22a6ad Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Tue, 25 Jun 2024 13:38:44 -0300
Subject: [PATCH 02/10] update allocation test

---
 test/allocations.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/allocations.jl b/test/allocations.jl
index 87bdd6c60..63d7a041f 100644
--- a/test/allocations.jl
+++ b/test/allocations.jl
@@ -60,8 +60,9 @@
     @. buff.solute_read = traj.x_solute
     @. buff.solvent_read = traj.x_solvent
     ComplexMixtures.update_unitcell!(system, ComplexMixtures.convert_unitcell(ComplexMixtures.getunitcell(traj)))
+    r_chunk = ComplexMixtures.ResultChunk(R, nothing)
     t_mddf_frame =
-        @benchmark ComplexMixtures.mddf_frame!($R, $system, $buff, $options, 1.0, $RNG) samples = 1 evals = 1
+        @benchmark ComplexMixtures.mddf_frame!($r_chunk, $system, $buff, $options, 1.0, $RNG) samples = 1 evals = 1
     @test t_mddf_frame.allocs < 100
 
 end

From cfa6d115e53826dc31f6ecb93062ad3900aab585 Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Tue, 25 Jun 2024 13:49:24 -0300
Subject: [PATCH 03/10] adjust tests

---
 src/mddf.jl | 141 +++++++++++++++++++++++++++-------------------------
 1 file changed, 73 insertions(+), 68 deletions(-)

diff --git a/src/mddf.jl b/src/mddf.jl
index f38d6bc07..019a57ed5 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -501,7 +501,7 @@ end
     water = AtomSelection(select(atoms, "resname WAT and model 1"), natomspermol=3)
     traj = Trajectory("$data_dir/toy/cross.pdb", protein, water, format="PDBTraj")
 
-    for nthreads in [1,2], lastframe in [1, 2], low_memory in [false]
+    for nthreads in [1,2], lastframe in [1, 2], low_memory in [true, false]
         options = Options(;
             seed=321,
             StableRNG=true,
@@ -624,81 +624,86 @@ end
     using PDBTools: readPDB, select
     using ComplexMixtures.Testing: data_dir, pdbfile
 
-    options = Options(
-        seed=1, 
-        stride=1, 
-        StableRNG=true, 
-        nthreads=1, 
-        n_random_samples=100,
-        silent=true
-    )
     atoms = readPDB(pdbfile)
     protein = AtomSelection(select(atoms, "protein"), nmols=1)
     tmao = AtomSelection(select(atoms, "resname TMAO"), natomspermol=14)
-
-    # Test actual system: cross correlation
     traj = Trajectory("$data_dir/NAMD/trajectory.dcd", protein, tmao)
-    R = mddf(traj, options)
-    # Deterministic
-    @test R.volume.total ≈ 603078.4438609097
-    @test sum(R.md_count) ≈ 25.250000000000007
-    @test sum(R.rdf_count) ≈ 19.550000000000004
-    @test R.density.solute ≈ 1.6581590839128614e-6
-    @test R.density.solvent ≈ 0.00030012679418822794
-    # Dependent on the random number seed
-    @test R.volume.domain ≈ 74560.15401932324 rtol = 0.1
-    @test R.volume.bulk ≈ 528518.2898415865 rtol = 0.1
-    @test R.density.solvent_bulk ≈ 0.0003054766563488117 rtol = 0.1
-    @test sum(R.mddf) ≈ 527.670164155438 rtol = 0.1
-    @test sum(R.rdf) ≈ 444.71561185073836 rtol = 0.1
-    @test R.kb[end] ≈ -5775.215792514756 rtol = 0.5
-    @test R.kb_rdf[end] ≈ -6360.471034166915 rtol = 0.5
 
     # Throw error in incorrect call to coordination_number
-    @test_throws ArgumentError coordination_number(traj, options, coordination_number_only=true)
-    
+    @test_throws ArgumentError coordination_number(traj, Options(); coordination_number_only=true)
+
     # Throw insufficent memory error
     @test_throws ErrorException mddf(traj, Options(nthreads=10^10))
 
-    # Self correlation
-    traj = Trajectory("$data_dir/NAMD/trajectory.dcd", tmao)
-    R = mddf(traj, options)
-    # Deterministic
-    @test R.volume.total ≈ 603078.4438609097
-    @test sum(R.md_count) ≈ 2.8939226519337016
-    @test sum(R.rdf_count) ≈ 1.858839779005525
-    @test R.density.solute ≈ 0.00030012679418822794
-    @test R.density.solvent ≈ 0.00030012679418822794
-    # Dependent on the random number seed
-    @test R.volume.domain ≈ 6958.855154995052 rtol = 0.1
-    @test R.volume.bulk ≈ 596277.0591884783 rtol = 0.1
-    @test R.density.solvent_bulk ≈ 0.00029875568324470034 rtol = 0.1
-    @test sum(R.mddf) ≈ 434.7773107740875 rtol = 0.1
-    @test sum(R.rdf) ≈ 345.9169130954568 rtol = 0.1
-    @test R.kb[end] ≈ -476 rtol = 0.5
-    @test R.kb_rdf[end] ≈ -421 rtol = 0.5
-
-    # Test varying frame weights: the trajectory below has 3 frames
-    # extracted from NAMD/trajectory.dcd, and the 2 first frames are the
-    # first frame duplicated.
-    traj1 = Trajectory("$data_dir/NAMD/traj_duplicated_first_frame.dcd", tmao)
-    R1 = mddf(traj1, Options())
-    traj2 = Trajectory("$data_dir/NAMD/trajectory.dcd", tmao)
-    R2 = mddf(traj2, Options(lastframe=2), frame_weights=[2.0, 1.0])
-    @test R2.md_count ≈ R1.md_count
-    @test all(R2.solute_group_count == R1.solute_group_count)
-    @test all(R2.solvent_group_count == R1.solvent_group_count)
-    R2 = mddf(traj2, Options(lastframe=2), frame_weights=[0.5, 0.25])
-    @test R2.md_count ≈ R1.md_count
-    @test all(R2.solute_group_count == R1.solute_group_count)
-    @test all(R2.solvent_group_count == R1.solvent_group_count)
-    @test R2.volume.total ≈ R1.volume.total
-    # Varying weights with stride
-    R1 = mddf(traj1, Options(firstframe=2, lastframe=3))
-    R2 = mddf(traj1, Options(lastframe=3, stride=2), frame_weights=[1.0, 100.0, 1.0])
-    @test R2.md_count ≈ R1.md_count
-    @test all(R2.solute_group_count == R1.solute_group_count)
-    @test all(R2.solvent_group_count == R1.solvent_group_count)
-    @test R2.volume.total ≈ R1.volume.total
+    for nthreads in [1,2], low_memory in [true, false]
+        options = Options(;
+            seed=1, 
+            stride=1, 
+            StableRNG=true, 
+            nthreads, 
+            n_random_samples=100,
+            silent=true
+        )
+        # Test actual system: cross correlation
+        traj = Trajectory("$data_dir/NAMD/trajectory.dcd", protein, tmao)
+        R = mddf(traj, options; low_memory)
+        # Deterministic
+        @test R.volume.total ≈ 603078.4438609097
+        @test sum(R.md_count) ≈ 25.250000000000007
+        @test sum(R.rdf_count) ≈ 19.550000000000004
+        @test R.density.solute ≈ 1.6581590839128614e-6
+        @test R.density.solvent ≈ 0.00030012679418822794
+        # Dependent on the random number seed
+        @test R.volume.domain ≈ 74560.15401932324 rtol = 0.1
+        @test R.volume.bulk ≈ 528518.2898415865 rtol = 0.1
+        @test R.density.solvent_bulk ≈ 0.0003054766563488117 rtol = 0.1
+        @test sum(R.mddf) ≈ 527.670164155438 rtol = 0.1
+        @test sum(R.rdf) ≈ 444.71561185073836 rtol = 0.1
+        @test R.kb[end] ≈ -5775.215792514756 rtol = 0.5
+        @test R.kb_rdf[end] ≈ -6360.471034166915 rtol = 0.5
+
+        # Self correlation
+        traj = Trajectory("$data_dir/NAMD/trajectory.dcd", tmao)
+        R = mddf(traj, options; low_memory)
+        # Deterministic
+        @test R.volume.total ≈ 603078.4438609097
+        @test sum(R.md_count) ≈ 2.8939226519337016
+        @test sum(R.rdf_count) ≈ 1.858839779005525
+        @test R.density.solute ≈ 0.00030012679418822794
+        @test R.density.solvent ≈ 0.00030012679418822794
+        # Dependent on the random number seed
+        if nthreads == 1
+            @test R.volume.domain ≈ 6958.855154995052 rtol = 0.1
+            @test R.volume.bulk ≈ 596277.0591884783 rtol = 0.1
+            @test R.density.solvent_bulk ≈ 0.00029875568324470034 rtol = 0.1
+            @test sum(R.mddf) ≈ 434.7773107740875 rtol = 0.1
+            @test sum(R.rdf) ≈ 345.9169130954568 rtol = 0.1
+            @test R.kb[end] ≈ -476 rtol = 0.5
+            @test R.kb_rdf[end] ≈ -421 rtol = 0.5
+        end
+
+        # Test varying frame weights: the trajectory below has 3 frames
+        # extracted from NAMD/trajectory.dcd, and the 2 first frames are the
+        # first frame duplicated.
+        traj1 = Trajectory("$data_dir/NAMD/traj_duplicated_first_frame.dcd", tmao)
+        R1 = mddf(traj1, Options(); low_memory)
+        traj2 = Trajectory("$data_dir/NAMD/trajectory.dcd", tmao)
+        R2 = mddf(traj2, Options(lastframe=2); frame_weights=[2.0, 1.0], low_memory)
+        @test R2.md_count ≈ R1.md_count
+        @test all(R2.solute_group_count == R1.solute_group_count)
+        @test all(R2.solvent_group_count == R1.solvent_group_count)
+        R2 = mddf(traj2, Options(lastframe=2); frame_weights=[0.5, 0.25], low_memory)
+        @test R2.md_count ≈ R1.md_count
+        @test all(R2.solute_group_count == R1.solute_group_count)
+        @test all(R2.solvent_group_count == R1.solvent_group_count)
+        @test R2.volume.total ≈ R1.volume.total
+        # Varying weights with stride
+        R1 = mddf(traj1, Options(firstframe=2, lastframe=3); low_memory)
+        R2 = mddf(traj1, Options(lastframe=3, stride=2); frame_weights=[1.0, 100.0, 1.0], low_memory)
+        @test R2.md_count ≈ R1.md_count
+        @test all(R2.solute_group_count == R1.solute_group_count)
+        @test all(R2.solvent_group_count == R1.solvent_group_count)
+        @test R2.volume.total ≈ R1.volume.total
+    end
 
 end

From 50e0c8c44784d10ae9214e3cae5ac1d9d1270ee8 Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Tue, 25 Jun 2024 13:57:59 -0300
Subject: [PATCH 04/10] do not error if low_memory is set

---
 src/mddf.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mddf.jl b/src/mddf.jl
index 019a57ed5..b3df28977 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -228,7 +228,7 @@ function mddf(
     # Check if the system free memory is enough, if not throw an error
     required_memory = Base.summarysize(R) * nchunks / 1024^3
     total_memory = Sys.total_memory()  / 1024^3
-    if mem_warn && (required_memory > 0.5 * total_memory)
+    if !low_memory && mem_warn && (required_memory > 0.5 * total_memory)
         @warn begin 
             """\n
             The memory required for the computation is a large proportion of the total system memory.
@@ -251,7 +251,7 @@ function mddf(
             """
         end _file = nothing _line = nothing
     end
-    if required_memory > total_memory
+    if !low_memory && (required_memory > total_memory)
         throw(ErrorException("The memory required for the computation is larger than the total system memory."))
     end
 

From a72cc382d344f1bf2b2bf3403e85bf5c3e731e0c Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Tue, 25 Jun 2024 16:40:45 -0300
Subject: [PATCH 05/10] change low_memory strategy

---
 src/mddf.jl              | 107 ++++++++++++++++-----------------------
 src/minimum_distances.jl |  15 ++++--
 2 files changed, 56 insertions(+), 66 deletions(-)

diff --git a/src/mddf.jl b/src/mddf.jl
index b3df28977..043e00387 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -49,27 +49,6 @@ end
     end
 end
 
-#
-# Structure to carry the results of the computation
-# in threaded chunks: carries the information if the
-# results must be updated atomically or not. The locked
-# update is used in the `low_memory` mode.
-#
-struct ResultChunk{L<:Union{ReentrantLock,Nothing}}
-    R::Result
-    lock::L
-end
-# unlocked updates
-update_volume!(r_chunk::ResultChunk{Nothing}, system, frame_weight) = 
-    r_chunk.R.volume.total += frame_weight * cell_volume(system)
-update_counters!(r_chunk::ResultChunk{Nothing}, args...) =
-    update_counters!(r_chunk.R, args...)
-# locked updates
-update_volume!(r_chunk::ResultChunk{ReentrantLock}, system, frame_weight) = 
-    @lock r_chunk.lock r_chunk.R.volume.total += frame_weight * cell_volume(system)
-update_counters!(r_chunk::ResultChunk{ReentrantLock}, args...) =
-    @lock r_chunk.lock update_counters!(r_chunk.R, args...)
-
 #
 # Defines is a molecule is a bulk molecule
 #
@@ -189,12 +168,6 @@ function mddf(
     # Set random number generator
     RNG = init_random(options)
 
-    # Number of threads (chunks) to use
-    nchunks = options.nthreads
-    if nchunks == 0
-        nchunks = Threads.nthreads()
-    end
-    
     # Compute some meta-data from the trajectory file and the options,
     # to allow parallel creation of the Result, and ParticleSystem
     # data structures
@@ -215,6 +188,9 @@ function mddf(
         iframe += 1
     end
 
+    # Default number of chunks for !low_memory run
+    nchunks = options.nthreads == 0 ? Threads.nthreads() : options.nthreads
+
     # Print some information about this run
     if !options.silent
         title(R, trajectory.solute, trajectory.solvent, nchunks)
@@ -228,7 +204,7 @@ function mddf(
     # Check if the system free memory is enough, if not throw an error
     required_memory = Base.summarysize(R) * nchunks / 1024^3
     total_memory = Sys.total_memory()  / 1024^3
-    if !low_memory && mem_warn && (required_memory > 0.5 * total_memory)
+    if mem_warn && (required_memory > 0.5 * total_memory)
         @warn begin 
             """\n
             The memory required for the computation is a large proportion of the total system memory.
@@ -251,22 +227,34 @@ function mddf(
             """
         end _file = nothing _line = nothing
     end
-    if !low_memory && (required_memory > total_memory)
+    if required_memory > total_memory
         throw(ErrorException("The memory required for the computation is larger than the total system memory."))
     end
 
+    #
+    # Number of batches of parallel CellListMap minimum-distance calculation
+    #
+    parallel_cl, nbatches_cl = if low_memory
+        true, (min(8, max(1,fld(Threads.nthreads(), nchunks))), max(1,fld(Threads.nthreads(), nchunks))) 
+    else
+        false, (1,1)
+    end
+    if low_memory && !options.silent
+        println("Running with low-memory option. Parallel CellListMap will be used.")
+        println("  Number of parallel minimum-distance computations: $nchunks")
+        println("  Each computation will use $(nbatches_cl[2]) threads.")
+    end
+    
     # Loop over the trajectory
     read_lock = ReentrantLock()
     sum_results_lock = ReentrantLock()
     Threads.@threads for frame_range in ChunkSplitters.chunks(to_read_frames; n=nchunks)
         # Local data structures for this chunk
-        system_chunk = ParticleSystem(trajectory, trajectory_data.unitcell, options)
+        system_chunk = ParticleSystem(
+            trajectory, trajectory_data.unitcell, options, parallel_cl, nbatches_cl
+        )
         buff_chunk = Buffer(trajectory, R)
-        if low_memory
-            R_chunk = ResultChunk(R, sum_results_lock) # has to be updated with locks: slow but memory efficient
-        else
-            R_chunk = ResultChunk(Result(trajectory, options; trajectory_data, frame_weights), nothing)
-        end
+        r_chunk = Result(trajectory, options; trajectory_data, frame_weights)
         # Reset the number of frames read by each chunk
         nframes_read = 0
         for _ in frame_range
@@ -284,7 +272,7 @@ function mddf(
                     unitcell = convert_unitcell(trajectory_data.unitcell, getunitcell(trajectory))
                     update_unitcell!(system_chunk, unitcell)
                     # Read weight of this frame. 
-                    frame_weight = R_chunk.R.files[1].frame_weights[iframe]
+                    frame_weight = r_chunk.files[1].frame_weights[iframe]
                     # Display progress bar
                     options.silent || next!(progress)
                 end
@@ -296,20 +284,15 @@ function mddf(
                 nframes_read += 1
                 # Compute distances in this frame and update results
                 if !coordination_number_only
-                    mddf_frame!(R_chunk, system_chunk, buff_chunk, options, frame_weight, RNG)
+                    mddf_frame!(r_chunk, system_chunk, buff_chunk, options, frame_weight, RNG)
                 else
-                    coordination_number_frame!(R_chunk, system_chunk, buff_chunk, frame_weight)
+                    coordination_number_frame!(r_chunk, system_chunk, buff_chunk, frame_weight)
                 end
             end # compute if
         end # frame range for this chunk
 
         # Sum the results of this chunk into the total results data structure
-        if !low_memory
-            R_chunk.R.files[1].nframes_read = nframes_read
-            @lock sum_results_lock sum!(R, R_chunk.R)
-        else
-            @lock sum_results_lock R.files[1].nframes_read += nframes_read
-        end
+        @lock sum_results_lock sum!(R, r_chunk)
 
     end
     closetraj!(trajectory)
@@ -325,16 +308,17 @@ end
 # Compute cell volume from unitcell matrix
 cell_volume(system::AbstractParticleSystem) =
     @views dot(cross(system.unitcell[:, 1], system.unitcell[:, 2]), system.unitcell[:, 3])
+update_volume!(r_chunk, system, frame_weight) = r_chunk.volume.total += frame_weight * cell_volume(system)
 
 #=
-    mddf_frame!(r_chunk::ResultChunk, system::AbstractParticleSystem, buff::Buffer, options::Options, frame_weight, RNG)
+    mddf_frame!(r_chunk::Result, system::AbstractParticleSystem, buff::Buffer, options::Options, frame_weight, RNG)
 
-Computes the MDDF for a single frame. Modifies the data in the `r_chunk` (type `ResultChunk`) structure, through
+Computes the MDDF for a single frame. Modifies the data in the `r_chunk` (type `Result`) structure, through
 the `update_volume!` and `update_counters!` functions.
 
 =#
 function mddf_frame!(
-    r_chunk::ResultChunk,
+    r_chunk::Result,
     system::AbstractParticleSystem,
     buff::Buffer,
     options::Options,
@@ -347,7 +331,7 @@ function mddf_frame!(
 
     # Random set of solute molecules to use as reference for the ideal gas distributions
     for i in eachindex(buff.ref_solutes)
-        buff.ref_solutes[i] = rand(RNG, 1:r_chunk.R.solute.nmols)
+        buff.ref_solutes[i] = rand(RNG, 1:r_chunk.solute.nmols)
     end
 
     #
@@ -355,15 +339,15 @@ function mddf_frame!(
     #
     update_lists = true
     system.ypositions .= buff.solvent_read
-    for isolute = 1:r_chunk.R.solute.nmols
+    for isolute = 1:r_chunk.solute.nmols
 
         # We need to do this one solute molecule at a time to avoid exploding the memory requirements
-        system.xpositions .= viewmol(isolute, buff.solute_read, r_chunk.R.solute)
+        system.xpositions .= viewmol(isolute, buff.solute_read, r_chunk.solute)
 
         # Compute minimum distances of the molecules to the solute (updates system.list, and returns it)
         # The cell lists will be recomputed for the first solute, or if a random distribution was computed
         # for the previous solute
-        minimum_distances!(system, r_chunk.R, isolute; update_lists=update_lists)
+        minimum_distances!(system, r_chunk, isolute; update_lists=update_lists)
         update_lists = false # avoid recomputation of the cell lists in the next iteration
 
         # For each solute molecule, update the counters (this is highly suboptimal, because
@@ -384,15 +368,15 @@ function mddf_frame!(
             # Annotate the indices of the molecules that are in the bulk solution
             n_solvent_in_bulk = 0
             for i in eachindex(system.list)
-                r_chunk.R.autocorrelation && i == isolute && continue
+                r_chunk.autocorrelation && i == isolute && continue
                 if inbulk(system.list[i], options)
                     n_solvent_in_bulk += 1
                     buff.indices_in_bulk[n_solvent_in_bulk] = i
                 end
             end
             for _ = 1:nrand
-                randomize_solvent!(system, buff, n_solvent_in_bulk, r_chunk.R, RNG)
-                minimum_distances!(system, r_chunk.R, isolute; update_lists=update_lists)
+                randomize_solvent!(system, buff, n_solvent_in_bulk, r_chunk, RNG)
+                minimum_distances!(system, r_chunk, isolute; update_lists=update_lists)
                 update_counters!(r_chunk, system, frame_weight, Val(:random))
             end
             # Restore positions and minimum distance list of system structure
@@ -406,37 +390,36 @@ function mddf_frame!(
 end
 
 #=
-    coordination_number_frame!(r_chunk::ResultChunk, system::AbstractParticleSystem, buff::Buffer, frame_weight)
+    coordination_number_frame!(r_chunk::Result, system::AbstractParticleSystem, buff::Buffer, frame_weight)
 
-Computes the coordination number for a single frame. Modifies the data in the `r_chunk` (type `ResultChunk`) structure, through
+Computes the coordination number for a single frame. Modifies the data in the `r_chunk` (type `Result`) structure, through
 the `update_volume!` and `update_counters!` functions.
 
 =#
 function coordination_number_frame!(
-    r_chunk::ResultChunk,
+    r_chunk::Result,
     system::AbstractParticleSystem,
     buff::Buffer,
     frame_weight::Float64,
-    sum_results_lock::Union{<:ReentrantLock,Nothing} = nothing,
 )
 
     # Sum up the volume of this frame
-    upate_volume!(r_chunk, system, frame_weight)
+    update_volume!(r_chunk, system, frame_weight)
 
     #
     # Compute the MDDFs for each solute molecule
     #
     update_lists = true
     system.ypositions .= buff.solvent_read
-    for isolute = 1:r_chunk.R.solute.nmols
+    for isolute = 1:r_chunk.solute.nmols
 
         # We need to do this one solute molecule at a time to avoid exploding the memory requirements
-        system.xpositions .= viewmol(isolute, buff.solute_read, r_chunk.R.solute)
+        system.xpositions .= viewmol(isolute, buff.solute_read, r_chunk.solute)
 
         # Compute minimum distances of the molecules to the solute (updates system.list, and returns it)
         # The cell lists will be recomputed for the first solute, or if a random distribution was computed
         # for the previous solute
-        minimum_distances!(system, r_chunk.R, isolute; update_lists=update_lists)
+        minimum_distances!(system, r_chunk, isolute; update_lists=update_lists)
         update_lists = false # avoid recomputation of the cell lists in the next iteration
 
         # For each solute molecule, update the counters (this is highly suboptimal, because
diff --git a/src/minimum_distances.jl b/src/minimum_distances.jl
index 63c22f174..8917e2be2 100644
--- a/src/minimum_distances.jl
+++ b/src/minimum_distances.jl
@@ -164,16 +164,23 @@ will be setup such that `xpositions` corresponds to one molecule of the solute,
 `ypositions` contains all coordinates of all atoms of the solvent. 
 
 =#
-function CellListMap.ParticleSystem(trajectory::Trajectory, unitcell, options::Options)
-    system = ParticleSystem(
+function CellListMap.ParticleSystem(
+    trajectory::Trajectory, 
+    unitcell, 
+    options::Options, 
+    parallel::Bool,
+    nbatches::Tuple{Int,Int},
+)
+    system = ParticleSystem(;
         xpositions = zeros(SVector{3,Float64}, trajectory.solute.natomspermol),
         ypositions = zeros(SVector{3,Float64}, trajectory.solvent.nmols * trajectory.solvent.natomspermol),
-        unitcell = unitcell,
+        unitcell,
         cutoff = options.usecutoff ? options.cutoff : options.dbulk,
         output = fill(zero(MinimumDistance), trajectory.solvent.nmols),
         output_name = :list,
         lcell = options.lcell,
-        parallel = false, # Important: parallellization is performed at the frame level
+        parallel, # true only if low_memory is set 
+        nbatches,
         autoswap = false, # The lists will be built for the solvent, always
     )
     return system

From e72bcd666fe777b5a3ff20677054bc666ac1370c Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Wed, 26 Jun 2024 11:31:49 -0300
Subject: [PATCH 06/10] split parallel setup in new function/file

---
 src/ComplexMixtures.jl |  3 +++
 src/parallel_setup.jl  | 59 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 src/parallel_setup.jl

diff --git a/src/ComplexMixtures.jl b/src/ComplexMixtures.jl
index a97d025d3..b29afe889 100644
--- a/src/ComplexMixtures.jl
+++ b/src/ComplexMixtures.jl
@@ -69,6 +69,9 @@ include("./minimum_distances.jl")
 # Update counters routines
 include("./update_counters.jl")
 
+# Parallel steup function
+include("./parallel_setup.jl")
+
 # Main function
 include("./mddf.jl")
 
diff --git a/src/parallel_setup.jl b/src/parallel_setup.jl
new file mode 100644
index 000000000..bdc61add9
--- /dev/null
+++ b/src/parallel_setup.jl
@@ -0,0 +1,59 @@
+#=
+
+This function will set the number of chunks and the parameters for the CellListMap
+parallelization, given the size of the results data structure and the level of parallelization.
+
+=#
+function parallel_setup(options::Options, R::Result, low_memory::Bool, mem_warn::Bool)
+
+    # Number of threads available 
+    nthreads = options.nthreads == 0 ? Threads.nthreads() : options.nthreads
+
+    # Memory requirements, given the size of the results data structure and 
+    # the level of parallelization
+    total_memory = Sys.total_memory()  / 1024^3
+    results_memory = Base.summarysize(R) / 1024^3
+    required_memory = results_memory * nthreads
+
+    #
+    # Set the number of chunks and CellListMap parameters for low-memory option
+    #
+    nchunks, parallel_cl, nbatches_cl = if low_memory
+        nchunks = max(1, min(nthreads, Int(fld(0.2 * total_memory, results_memory))))
+        nthreads_per_chunk = Int(fld(nthreads, nchunks))
+        (nchunks, true, (min(8,nthreads_per_chunk), nthreads_per_chunk))
+    else
+        (nthreads, false, (1,1))
+    end
+
+    if mem_warn && (nchunks * results_memory > 0.5 * total_memory)
+        @warn begin 
+            """\n
+            The memory required for the computation is a large proportion of the total system's memory.
+            Depending on resources used by other processes, this may lead to memory exhaustion and
+            and the termination of the computation.
+
+            - The Results data structure is $(round(results_memory, digits=2)) GiB, and $nchunks copies are required 
+              for the parallel computation, thus requiring $(round(required_memory, digits=2)) GiB.
+            - The total system memory is: $(round(total_memory, digits=2)) GiB.
+
+            To reduce memory requirements, consider:
+
+            - Use the `low_memory = true` option in the call to `mddf`.
+              For example: `mddf(traj, options; low_memory = true)`
+            - Reducing the number of threads, with the `Options(nthreads=N)` parameter.
+              Here, we suggest at most N=$(Int(fld(0.2 * total_memory, results_memory))).
+            - Using the predefinition of custom groups of atoms in the solute and solvent.
+              See: https://m3g.github.io/ComplexMixtures.jl/stable/selection/#predefinition-of-groups
+
+            To suppress this warning, set `mem_warn = false` in the call to `mddf`.
+                
+            """
+        end _file = nothing _line = nothing
+    end
+    if nchunks * results_memory > total_memory
+        throw(ErrorException("The memory required for the computation is larger than the total system memory."))
+    end
+
+    return nchunks, parallel_cl, nbatches_cl, nthreads
+end

From aa69ca7c7637306658b8a98a979336503dc742d0 Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Wed, 26 Jun 2024 11:32:10 -0300
Subject: [PATCH 07/10] redefine the parallel steup for low_memory

---
 src/mddf.jl | 149 ++++++++++++++++++++--------------------------------
 1 file changed, 56 insertions(+), 93 deletions(-)

diff --git a/src/mddf.jl b/src/mddf.jl
index 043e00387..b579ed731 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -188,12 +188,19 @@ function mddf(
         iframe += 1
     end
 
-    # Default number of chunks for !low_memory run
-    nchunks = options.nthreads == 0 ? Threads.nthreads() : options.nthreads
+    # Define how the parallelization will be performed, according to the memory
+    # requirements of the computation
+    nchunks, parallel_cl, nbatches_cl, nthreads = parallel_setup(options, R, low_memory, mem_warn)
 
     # Print some information about this run
     if !options.silent
-        title(R, trajectory.solute, trajectory.solvent, nchunks)
+        title(R, trajectory.solute, trajectory.solvent, nthreads)
+        if low_memory 
+            println("Running with low-memory option:")
+            println("  - Parallel CellListMap will be used.")
+            println("  - Number of parallel minimum-distance computations: $nchunks")
+            println("  - Each minimum-distance computation will use $(nbatches_cl[2]) threads.")
+        end
         progress = Progress(R.files[1].nframes_read; dt=1)
     end
 
@@ -201,100 +208,56 @@ function mddf(
     to_read_frames = options.firstframe:R.files[1].lastframe_read
     to_compute_frames = options.firstframe:options.stride:R.files[1].lastframe_read
 
-    # Check if the system free memory is enough, if not throw an error
-    required_memory = Base.summarysize(R) * nchunks / 1024^3
-    total_memory = Sys.total_memory()  / 1024^3
-    if mem_warn && (required_memory > 0.5 * total_memory)
-        @warn begin 
-            """\n
-            The memory required for the computation is a large proportion of the total system memory.
-            Depending on resources used by other processes, this may lead to memory exhaustion and
-            and the termination of the computation.
-
-            - The Results data structure is $(round(required_memory/nchunks, digits=2)) GiB, and $nchunks copies are required 
-              for the parallel computation, thus requiring $(round(required_memory, digits=2)) GiB.
-            - The total system memory is: $(round(total_memory, digits=2)) GiB.
-
-            To reduce memory requirements, consider:
-
-            - Reducing the number of threads, with the `Options(nthreads=N)` parameter.
-              Here, we suggest at most N=$(Int(fld(0.5 * total_memory, required_memory/nchunks))).
-            - Using the predefinition of custom groups of atoms in the solute and solvent.
-              See: https://m3g.github.io/ComplexMixtures.jl/stable/selection/#predefinition-of-groups
-
-            To suppress this warning, set `mem_warn = false` in the call to `mddf`.
-                
-            """
-        end _file = nothing _line = nothing
-    end
-    if required_memory > total_memory
-        throw(ErrorException("The memory required for the computation is larger than the total system memory."))
-    end
-
-    #
-    # Number of batches of parallel CellListMap minimum-distance calculation
-    #
-    parallel_cl, nbatches_cl = if low_memory
-        true, (min(8, max(1,fld(Threads.nthreads(), nchunks))), max(1,fld(Threads.nthreads(), nchunks))) 
-    else
-        false, (1,1)
-    end
-    if low_memory && !options.silent
-        println("Running with low-memory option. Parallel CellListMap will be used.")
-        println("  Number of parallel minimum-distance computations: $nchunks")
-        println("  Each computation will use $(nbatches_cl[2]) threads.")
-    end
-    
     # Loop over the trajectory
     read_lock = ReentrantLock()
     sum_results_lock = ReentrantLock()
-    Threads.@threads for frame_range in ChunkSplitters.chunks(to_read_frames; n=nchunks)
-        # Local data structures for this chunk
-        system_chunk = ParticleSystem(
-            trajectory, trajectory_data.unitcell, options, parallel_cl, nbatches_cl
-        )
-        buff_chunk = Buffer(trajectory, R)
-        r_chunk = Result(trajectory, options; trajectory_data, frame_weights)
-        # Reset the number of frames read by each chunk
-        nframes_read = 0
-        for _ in frame_range
-            local compute, frame_weight
-            # Read frame coordinates
-            @lock read_lock begin
-                iframe, compute = goto_nextframe!(iframe, R, trajectory, to_compute_frames, options) 
+    @sync for frame_range in ChunkSplitters.chunks(to_read_frames; n=nchunks)
+        Threads.@spawn begin
+            # Local data structures for this chunk
+            system_chunk = ParticleSystem(
+                trajectory, trajectory_data.unitcell, options, parallel_cl, nbatches_cl
+            )
+            buff_chunk = Buffer(trajectory, R)
+            r_chunk = Result(trajectory, options; trajectory_data, frame_weights)
+            # Reset the number of frames read by each chunk
+            nframes_read = 0
+            for _ in frame_range
+                local compute, frame_weight
+                # Read frame coordinates
+                @lock read_lock begin
+                    iframe, compute = goto_nextframe!(iframe, R, trajectory, to_compute_frames, options) 
+                    if compute
+                        # Read frame for computing 
+                        # The solute coordinates must be read in intermediate arrays, because the 
+                        # solute molecules will be considered one at a time in the computation of the
+                        # minimum distances
+                        @. buff_chunk.solute_read = trajectory.x_solute
+                        @. buff_chunk.solvent_read = trajectory.x_solvent
+                        unitcell = convert_unitcell(trajectory_data.unitcell, getunitcell(trajectory))
+                        update_unitcell!(system_chunk, unitcell)
+                        # Read weight of this frame. 
+                        frame_weight = r_chunk.files[1].frame_weights[iframe]
+                        # Display progress bar
+                        options.silent || next!(progress)
+                    end
+                end # release reading lock
+                #
+                # Perform MDDF computation
+                #
                 if compute
-                    # Read frame for computing 
-                    # The solute coordinates must be read in intermediate arrays, because the 
-                    # solute molecules will be considered one at a time in the computation of the
-                    # minimum distances
-                    @. buff_chunk.solute_read = trajectory.x_solute
-                    @. buff_chunk.solvent_read = trajectory.x_solvent
-                    unitcell = convert_unitcell(trajectory_data.unitcell, getunitcell(trajectory))
-                    update_unitcell!(system_chunk, unitcell)
-                    # Read weight of this frame. 
-                    frame_weight = r_chunk.files[1].frame_weights[iframe]
-                    # Display progress bar
-                    options.silent || next!(progress)
-                end
-            end # release reading lock
-            #
-            # Perform MDDF computation
-            #
-            if compute
-                nframes_read += 1
-                # Compute distances in this frame and update results
-                if !coordination_number_only
-                    mddf_frame!(r_chunk, system_chunk, buff_chunk, options, frame_weight, RNG)
-                else
-                    coordination_number_frame!(r_chunk, system_chunk, buff_chunk, frame_weight)
-                end
-            end # compute if
-        end # frame range for this chunk
-
-        # Sum the results of this chunk into the total results data structure
-        @lock sum_results_lock sum!(R, r_chunk)
-
-    end
+                    nframes_read += 1
+                    # Compute distances in this frame and update results
+                    if !coordination_number_only
+                        mddf_frame!(r_chunk, system_chunk, buff_chunk, options, frame_weight, RNG)
+                    else
+                        coordination_number_frame!(r_chunk, system_chunk, buff_chunk, frame_weight)
+                    end
+                end # compute if
+            end # frame range for this chunk
+            # Sum the results of this chunk into the total results data structure
+            @lock sum_results_lock sum!(R, r_chunk)
+        end # spawn
+    end # chunk loop
     closetraj!(trajectory)
 
     # Setup the final data structure with final values averaged over the number of frames,

From 6b238b48b08e13c3375a18e30eb05de88cdf814b Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Wed, 26 Jun 2024 11:36:05 -0300
Subject: [PATCH 08/10] remove mem_warn option

---
 src/mddf.jl           | 12 ++++++------
 src/parallel_setup.jl | 10 ++++------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/mddf.jl b/src/mddf.jl
index b579ed731..8da7b0616 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -111,8 +111,8 @@ end
         trajectory::Trajectory, 
         options::Options; 
         frame_weights = Float64[], 
-        coordination_number_only = false
-        mem_warn = true
+        coordination_number_only = false,
+        low_memory = false,
     )
 
 Computes the minimum-distance distribution function, atomic contributions, and 
@@ -134,8 +134,9 @@ site-counts and coordination numbers of the solvent molecules around the solute,
 This is useful when the normalization of the distribution is not possible or needed, for instance when
 the bulk solutio is not properly defined. The computation is much faster in this case. 
 
-The `mem_warn` keyword is a boolean that, if set to `true`, will issue a warning if the memory required for the computation
-is larger than 50% of the total system memory. This is to avoid memory exhaustion and the termination of the computation.
+The `low_memory` can be set to `true` to reduce the memory requirements of the computation. This will
+parallelize the computation of the minimum distances at a lower level, reducing the memory
+requirements at the expense of some performance. 
 
 ### Examples
 
@@ -158,7 +159,6 @@ function mddf(
     trajectory::Trajectory, options::Options=Options();
     frame_weights=Float64[],
     coordination_number_only=false,
-    mem_warn=true,
     low_memory=false,
 )
 
@@ -190,7 +190,7 @@ function mddf(
 
     # Define how the parallelization will be performed, according to the memory
     # requirements of the computation
-    nchunks, parallel_cl, nbatches_cl, nthreads = parallel_setup(options, R, low_memory, mem_warn)
+    nchunks, parallel_cl, nbatches_cl, nthreads = parallel_setup(options, R, low_memory)
 
     # Print some information about this run
     if !options.silent
diff --git a/src/parallel_setup.jl b/src/parallel_setup.jl
index bdc61add9..833d8e359 100644
--- a/src/parallel_setup.jl
+++ b/src/parallel_setup.jl
@@ -4,7 +4,7 @@ This function will set the number of chunks and the parameters for the CellListM
 parallelization, given the size of the results data structure and the level of parallelization.
 
 =#
-function parallel_setup(options::Options, R::Result, low_memory::Bool, mem_warn::Bool)
+function parallel_setup(options::Options, R::Result, low_memory::Bool)
 
     # Number of threads available 
     nthreads = options.nthreads == 0 ? Threads.nthreads() : options.nthreads
@@ -26,7 +26,7 @@ function parallel_setup(options::Options, R::Result, low_memory::Bool, mem_warn:
         (nthreads, false, (1,1))
     end
 
-    if mem_warn && (nchunks * results_memory > 0.5 * total_memory)
+    if nchunks * results_memory > 0.5 * total_memory
         @warn begin 
             """\n
             The memory required for the computation is a large proportion of the total system's memory.
@@ -41,13 +41,11 @@ function parallel_setup(options::Options, R::Result, low_memory::Bool, mem_warn:
 
             - Use the `low_memory = true` option in the call to `mddf`.
               For example: `mddf(traj, options; low_memory = true)`
-            - Reducing the number of threads, with the `Options(nthreads=N)` parameter.
-              Here, we suggest at most N=$(Int(fld(0.2 * total_memory, results_memory))).
             - Using the predefinition of custom groups of atoms in the solute and solvent.
               See: https://m3g.github.io/ComplexMixtures.jl/stable/selection/#predefinition-of-groups
+            - Reducing the number of threads, with the `Options(nthreads=N)` parameter.
+              Here, we suggest at most N=$(Int(fld(0.2 * total_memory, results_memory))).
 
-            To suppress this warning, set `mem_warn = false` in the call to `mddf`.
-                
             """
         end _file = nothing _line = nothing
     end

From 462070aa43da76e1da06b43cfe1edb8cf1e9f351 Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Wed, 26 Jun 2024 11:42:15 -0300
Subject: [PATCH 09/10] fix tests

---
 src/minimum_distances.jl | 5 ++---
 test/allocations.jl      | 7 +++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/minimum_distances.jl b/src/minimum_distances.jl
index 8917e2be2..f180a9228 100644
--- a/src/minimum_distances.jl
+++ b/src/minimum_distances.jl
@@ -157,7 +157,6 @@ function minimum_distances!(
 end
 
 #=
-    ParticleSystem(trajectory::Trajectory, options::Options)
 
 Setup the periodic system from CellListMap, to compute minimimum distances. The system
 will be setup such that `xpositions` corresponds to one molecule of the solute, and 
@@ -201,7 +200,7 @@ end
     protein = AtomSelection(select(atoms, "protein"), nmols = 1)
     traj = Trajectory("$(Testing.data_dir)/NAMD/trajectory.dcd", protein, tmao)
     tmeta = ComplexMixtures.TrajectoryMetaData(traj, options) 
-    system = ComplexMixtures.ParticleSystem(traj, tmeta.unitcell, options)
+    system = ComplexMixtures.ParticleSystem(traj, tmeta.unitcell, options, false, (1,1))
     @test system.cutoff == 10.0
     @test system.list == fill(zero(ComplexMixtures.MinimumDistance), 181)
     @test system.output == fill(zero(ComplexMixtures.MinimumDistance), 181)
@@ -217,7 +216,7 @@ end
     # Auto-correlation
     traj = Trajectory("$(Testing.data_dir)/NAMD/trajectory.dcd", tmao)
     tmeta = ComplexMixtures.TrajectoryMetaData(traj, options)
-    system = ComplexMixtures.ParticleSystem(traj, tmeta.unitcell, options)
+    system = ComplexMixtures.ParticleSystem(traj, tmeta.unitcell, options, false, (1,1))
     @test system.cutoff == 10.0
     @test system.list == fill(zero(ComplexMixtures.MinimumDistance), 181) # one molecule less
     @test system.output == fill(zero(ComplexMixtures.MinimumDistance), 181)
diff --git a/test/allocations.jl b/test/allocations.jl
index 63d7a041f..5de233a95 100644
--- a/test/allocations.jl
+++ b/test/allocations.jl
@@ -55,14 +55,13 @@
     @test t_RNG.allocs < 5
 
     tmeta = ComplexMixtures.TrajectoryMetaData(traj, options)
-    system = ComplexMixtures.ParticleSystem(traj, tmeta.unitcell, options)
+    system = ComplexMixtures.ParticleSystem(traj, tmeta.unitcell, options, false, (1, 1))
     buff = ComplexMixtures.Buffer(traj, R)
     @. buff.solute_read = traj.x_solute
     @. buff.solvent_read = traj.x_solvent
     ComplexMixtures.update_unitcell!(system, ComplexMixtures.convert_unitcell(ComplexMixtures.getunitcell(traj)))
-    r_chunk = ComplexMixtures.ResultChunk(R, nothing)
-    t_mddf_frame =
-        @benchmark ComplexMixtures.mddf_frame!($r_chunk, $system, $buff, $options, 1.0, $RNG) samples = 1 evals = 1
+    t_mddf_frame = 
+        @benchmark ComplexMixtures.mddf_frame!($R, $system, $buff, $options, 1.0, $RNG) samples = 1 evals = 1
     @test t_mddf_frame.allocs < 100
 
 end

From 4bc63c646db83b4739d2fa7805809341eee215bd Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Wed, 26 Jun 2024 11:43:24 -0300
Subject: [PATCH 10/10] add compat note

---
 src/mddf.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mddf.jl b/src/mddf.jl
index 8da7b0616..7f320c6ab 100644
--- a/src/mddf.jl
+++ b/src/mddf.jl
@@ -138,6 +138,9 @@ The `low_memory` can be set to `true` to reduce the memory requirements of the c
 parallelize the computation of the minimum distances at a lower level, reducing the memory
 requirements at the expense of some performance. 
 
+!!! compat
+    The `low_memory` option was introduced in v2.4.0.
+
 ### Examples
 
 ```julia-repl