Merge pull request #388 from 21cmfast/inverse_table_improvements

Inverse table improvements
21cmfast · May 17, 2024 · 62dff49 · 62dff49
2 parents cfe734f + d518903
commit 62dff49
Show file tree

Hide file tree

Showing 14 changed files with 1,063 additions and 678 deletions.
diff --git a/src/py21cmfast/inputs.py b/src/py21cmfast/inputs.py
@@ -290,31 +290,6 @@ class GlobalParams(StructInstanceWrapper):
         The peak gas temperatures behind the supersonic ionization fronts during reionization.
     VAVG:
         Avg value of the DM-b relative velocity [im km/s], ~0.9*SIGMAVCB (=25.86 km/s) normally.
-    STOC_MASS_TOL:
-        Mass tolerance for the stochastic halo sampling, a sample will be rejected if its mass sum falls
-        outside of the expected mass * (1 +- STOC_MASS_TOL)
-    SAMPLER_MIN_MASS:
-        Sets the minimum mass used by the halo sampler, halos below this mass will have the average contriubution
-        based on the integral of the given CMF. Greatly affects performance and memory usage.
-    MAXHALO_FACTOR:
-        Safety factor to multiply halo array sizes, total (all threads) array size will be UMF integral * MAXHALO_FACTOR
-    N_MASS_INTERP:
-        Number of mass bins for the sampler interpolation tables.
-    N_DELTA_INTERP:
-        Number of delta bins for the sampler interpolation tables.
-    N_PROB_INTERP:
-        Number of probability bins for the sampler interpolation tables.
-    MIN_LOGPROB:
-        Lower limit (in log probability) of the inverse CDF table.
-    SAMPLE_METHOD:
-        Sampling method to use for stochastic halos:
-            0: Mass sampling from CMF
-            1: N_halo sampling from CMF
-            2: Sheth & Lemson 99 Fcoll sampling
-            3: Darkforest (Qiu+20) binary splitting
-    AVG_BELOW_SAMPLER:
-        int flag to add the average halo proeprties in each cell between the source mass limit
-        and the mass sampled by the halo sampler
     """
 
     def __init__(self, wrapped, ffi):
@@ -510,11 +485,6 @@ class UserParams(StructWithDefaults):
         0: GSL QAG adaptive integration,
         1: Gauss-Legendre integration, previously forced in the interpolation tables,
         2: Approximate integration, assuming sharp cutoffs and a triple power-law for sigma(M) based on EPS
-    INTEGRATION_METHOD_HALOS: int, optional
-        The integration method to use for all conditional MF integrals for halos from the sampler catalogues:
-        0: GSL QAG adaptive integration,
-        1: Gauss-Legendre integration, previously forced in the interpolation tables,
-        2: Approximate integration, assuming sharp cutoffs and a triple power-law for sigma(M) based on EPS
     USE_2LPT: bool, optional
         Whether to use second-order Lagrangian perturbation theory (2LPT).
         Set this to True if the density field or the halo positions are extrapolated to
@@ -532,6 +502,40 @@ class UserParams(StructWithDefaults):
         If STOC_MINIMUM_Z is not provided, we simply sample at the given redshift, unless
         USE_TS_FLUCT is given or we want a lightcone, in which case the minimum redshift is set
         to the minimum redshift of those fields.
+    SAMPLER_MIN_MASS: float, optional
+        The minimum mass to sample in the halo sampler when USE_HALO_FIELD and HALO_STOCHASTICITY are true,
+        decreasing this can drastically increase both compute time and memory usage.
+    SAMPLER_BUFFER_FACTOR: float, optional
+        The arrays for the halo sampler will have size of SAMPLER_BUFFER_FACTOR multiplied by the expected
+        number of halos in the box. Ideally this should be close to unity but one may wish to increase it to
+        test alternative scenarios
+    N_COND_INTERP: int, optional
+        The number of condition bins in the inverse CMF tables.
+    N_PROB_INTERP: int, optional
+        The number of probability bins in the inverse CMF tables.
+    MIN_LOGPROB: float, optional
+        The minimum log-probability of the inverse CMF tables.
+    SAMPLE_METHOD: int, optional
+        The sampling method to use in the halo sampler when calculating progenitor populations:
+        0: Mass-limited CMF sampling, where samples are drawn until the expected mass is reached
+        1: Number-limited CMF sampling, where we select a number of halos from the Poisson distribution
+        and then sample the CMF that many times
+        2: Sheth et al 1999 Partition sampling, where the EPS collapsed fraction is sampled (gaussian tail)
+        and then the condition is updated using the conservation of mass.
+        3: Parkinsson et al 2008 Binary split model as in DarkForest (Qiu et al 2021) where the EPS merger rate
+        is sampled on small internal timesteps such that only binary splits can occur.
+        NOTE: Sampling from the density grid will ALWAYS use number-limited sampling (method 1)
+    AVG_BELOW_SAMPLER: bool, optional
+        When switched on, an integral is performed in each cell between the minimum source mass and SAMPLER_MIN_MASS,
+        effectively placing the average halo population in each HaloBox cell below the sampler resolution.
+        When switched off, all halos below SAMPLER_MIN_MASS are ignored. This flag saves memory for larger boxes,
+        while still including the effects of smaller sources, albeit without stochasticity.
+    HALOMASS_CORRECTION: float, optional
+        This provides a corrective factor to the mass-limited (SAMPLE_METHOD==0) sampling, which multiplies the
+        expected mass from a condition by this number. The default value of 0.9 is calibrated to the mass-limited
+        sampling on a timestep of ZPRIME_STEP_FACTOR=1.02.
+        If ZPRIME_STEP_FACTOR is increased, this value should be set closer to 1.
+        This factor is also used in the partition (SAMPLE_METHOD==2) sampler, dividing nu(M) of each sample drawn.
     """
 
     _ffi = ffi
@@ -551,15 +555,25 @@ class UserParams(StructWithDefaults):
         "USE_INTERPOLATION_TABLES": None,
         "INTEGRATION_METHOD_ATOMIC": 1,
         "INTEGRATION_METHOD_MINI": 1,
-        "INTEGRATION_METHOD_HALOS": 0,
         "USE_2LPT": True,
         "MINIMIZE_MEMORY": False,
         "STOC_MINIMUM_Z": None,
         "KEEP_3D_VELOCITIES": False,
+        "SAMPLER_MIN_MASS": 5e7,
+        "SAMPLER_BUFFER_FACTOR": 2.0,
+        "MAXHALO_FACTOR": 2.0,
+        "N_COND_INTERP": 200,
+        "N_PROB_INTERP": 400,
+        "MIN_LOGPROB": -12,
+        "SAMPLE_METHOD": 0,
+        "AVG_BELOW_SAMPLER": False,
+        "HALOMASS_CORRECTION": 0.9,
     }
 
     _hmf_models = ["PS", "ST", "WATSON", "WATSON-Z", "DELOS"]
     _power_models = ["EH", "BBKS", "EFSTATHIOU", "PEEBLES", "WHITE", "CLASS"]
+    _sample_methods = ["MASS-LIMITED", "NUMBER-LIMITED", "PARTITION", "BINARY-SPLIT"]
+    _integral_methods = ["GSL-QAG", "GAUSS-LEGENDRE", "GAMMA-APPROX"]
 
     @property
     def USE_INTERPOLATION_TABLES(self):
@@ -603,6 +617,38 @@ def HII_tot_num_pixels(self):
         """Total number of pixels in the low-res box."""
         return self.NON_CUBIC_FACTOR * self.HII_DIM**3
 
+    def _get_enum_property(self, prop, enum_list, propname=""):
+        """
+        Retrieve a value for a property with defined enum list (see UserParams._power_models etc.).
+
+        Arguments
+        ---------
+        prop: the hidden attribute to find in the enum list
+        enum_list: the list of parameter value strings
+        propname: the name of the property (for error messages)
+
+        Returns
+        -------
+        The index of prop within the parameter list, corresponding to it's integer value in
+        the C backend
+        """
+        # if it's a string we grab the index of the list
+        if isinstance(prop, str):
+            val = enum_list.index(prop.upper())
+        # otherwise it's a number so we leave it alone
+        else:
+            val = prop
+
+        try:
+            val = int(val)
+        except (ValueError, TypeError) as e:
+            raise ValueError(f"{val} is an invalid value for {propname}") from e
+
+        if not 0 <= val < len(enum_list):
+            raise ValueError(f"HMF must be an int between 0 and {len(enum_list) - 1}")
+
+        return val
+
     @property
     def POWER_SPECTRUM(self):
         """
@@ -622,40 +668,51 @@ def POWER_SPECTRUM(self):
                 self._POWER_SPECTRUM = 5
             return 5
         else:
-            if isinstance(self._POWER_SPECTRUM, str):
-                val = self._power_models.index(self._POWER_SPECTRUM.upper())
-            else:
-                val = self._POWER_SPECTRUM
-
-            if not 0 <= val < len(self._power_models):
-                raise ValueError(
-                    f"Power spectrum must be between 0 and {len(self._power_models) - 1}"
-                )
-
-            return val
+            return self._get_enum_property(
+                self._POWER_SPECTRUM, self._power_models, propname="POWER_SPECTRUM"
+            )
 
     @property
     def HMF(self):
         """The HMF to use (an int, mapping to a given form).
 
         See hmf_model for a string representation.
         """
-        if isinstance(self._HMF, str):
-            val = self._hmf_models.index(self._HMF.upper())
-        else:
-            val = self._HMF
+        return self._get_enum_property(self._HMF, self._hmf_models, propname="HMF")
 
-        try:
-            val = int(val)
-        except (ValueError, TypeError) as e:
-            raise ValueError("Invalid value for HMF") from e
+    @property
+    def SAMPLE_METHOD(self):
+        """The SAMPLE_METHOD to use (an int, mapping to a given form).
 
-        if not 0 <= val < len(self._hmf_models):
-            raise ValueError(
-                f"HMF must be an int between 0 and {len(self._hmf_models) - 1}"
-            )
+        See sample_method for a string representation.
+        """
+        return self._get_enum_property(
+            self._SAMPLE_METHOD, self._sample_methods, propname="SAMPLE_METHOD"
+        )
 
-        return val
+    @property
+    def INTEGRATION_METHOD_ATOMIC(self):
+        """The Integration method to use for atomic halos (an int, mapping to a given form).
+
+        See integral_method_atomic for a string representation.
+        """
+        return self._get_enum_property(
+            self._INTEGRATION_METHOD_ATOMIC,
+            self._integral_methods,
+            propname="INTEGRATION_METHOD_ATOMIC",
+        )
+
+    @property
+    def INTEGRATION_METHOD_MINI(self):
+        """The Integration method to use for atomic halos (an int, mapping to a given form).
+
+        See integral_method_atomic for a string representation.
+        """
+        return self._get_enum_property(
+            self._INTEGRATION_METHOD_MINI,
+            self._integral_methods,
+            propname="INTEGRATION_METHOD_MINI",
+        )
 
     @property
     def hmf_model(self):
@@ -668,13 +725,19 @@ def power_spectrum_model(self):
         return self._power_models[self.POWER_SPECTRUM]
 
     @property
-    def INTEGRATION_METHOD_HALOS(self):
-        """The integration methods other than QAG do not yet work for halos."""
-        if self._INTEGRATION_METHOD_HALOS != 0:
-            warnings.warn(
-                "Only the QAG integrator currently works for the halo sampler, setting to 1 or 2 is for testing only"
-            )
-        return self._INTEGRATION_METHOD_HALOS
+    def sample_method(self):
+        """String representation of the halo sampler method used."""
+        return self._sample_methods[self.SAMPLE_METHOD]
+
+    @property
+    def integration_method_atomic(self):
+        """String representation of the halo sampler method used."""
+        return self._integral_methods[self.INTEGRATION_METHOD_ATOMIC]
+
+    @property
+    def integration_method_mini(self):
+        """String representation of the halo sampler method used."""
+        return self._intregal_methods[self.INTEGRATION_METHOD_MINI]
 
     @property
     def cell_size(self) -> un.Quantity[un.Mpc]:

diff --git a/src/py21cmfast/src/21cmFAST.h b/src/py21cmfast/src/21cmFAST.h
@@ -31,10 +31,20 @@ struct UserParams{
     bool USE_INTERPOLATION_TABLES;
     int INTEGRATION_METHOD_ATOMIC;
     int INTEGRATION_METHOD_MINI;
-    int INTEGRATION_METHOD_HALOS;
     bool USE_2LPT;
     bool MINIMIZE_MEMORY;
     bool KEEP_3D_VELOCITIES;
+
+    //Halo Sampler Options
+    float SAMPLER_MIN_MASS;
+    double SAMPLER_BUFFER_FACTOR;
+    float MAXHALO_FACTOR;
+    int N_COND_INTERP;
+    int N_PROB_INTERP;
+    double MIN_LOGPROB;
+    int SAMPLE_METHOD;
+    bool AVG_BELOW_SAMPLER;
+    double HALOMASS_CORRECTION;
 };
 
 struct AstroParams{
@@ -132,7 +142,7 @@ struct HaloBox{
     float *n_ion; //weighted by F_ESC*PopN_ion
     float *halo_sfr; //for x-rays and Ts stuff
     float *halo_sfr_mini; //for x-rays and Ts stuff
-    float *whalo_sfr;
+    float *whalo_sfr; //SFR weighted by PopN_ion and F_ESC, used for Gamma12
 
     double log10_Mcrit_LW_ave;
 };
@@ -301,6 +311,7 @@ void initialise_SFRD_Conditional_table(double min_density, double max_density, d
                                     float Fstar10, float Fstar7_MINI, int method, int method_mini, bool minihalos);
 
 void initialise_dNdM_tables(double xmin, double xmax, double ymin, double ymax, double growth1, double param, bool from_catalog);
+void initialise_dNdM_inverse_table(double xmin, double xmax, double lnM_min, double growth1, double param, bool from_catalog);
 
 //evaluation of tables
 double EvaluateNionTs(double redshift, double Mlim_Fstar, double Mlim_Fesc);

diff --git a/src/py21cmfast/src/FindHaloes.c b/src/py21cmfast/src/FindHaloes.c
@@ -106,11 +106,7 @@ LOG_DEBUG("redshift=%f", redshift);
                 .HMF = user_params->HMF,
             };
 
-            if(user_params->INTEGRATION_METHOD_HALOS == 1){
-                initialise_GL(NGL_INT,log(M_MIN),log(Mmax_debug));
-            }
-
-            double nhalo_debug = IntegratedNdM(log(M_MIN), log(Mmax_debug), params, 1, user_params->INTEGRATION_METHOD_HALOS) * VOLUME;
+            double nhalo_debug = IntegratedNdM(log(M_MIN), log(Mmax_debug), params, 1, 0) * VOLUME;
             //expected halos above minimum filter mass
             LOG_DEBUG("DexM: We expect %.2f Halos between Masses [%.2e,%.2e]",nhalo_debug,M_MIN,Mmax_debug);
         }
@@ -166,7 +162,7 @@ LOG_DEBUG("redshift=%f", redshift);
 
             //Pending a serious deep-dive into this algorithm, I will force DexM to use the fitted parameters to the
             //      Sheth-Tormen mass function (as of right now, We do not even reproduce EPS results)
-            delta_crit = growth_factor*sheth_delc(Deltac/growth_factor, sigma_z0(M));
+            delta_crit = growth_factor*sheth_delc_dexm(Deltac/growth_factor, sigma_z0(M));
 
             // if(global_params.DELTA_CRIT_MODE == 1){
             //     //This algorithm does not use the sheth tormen OR Jenkins parameters,

diff --git a/src/py21cmfast/src/Globals.h b/src/py21cmfast/src/Globals.h
@@ -81,17 +81,6 @@ struct GlobalParams{
 
     float VAVG;
 
-    //Stochasticity Options
-    float STOC_MASS_TOL;
-    float SAMPLER_MIN_MASS;
-    float MAXHALO_FACTOR;
-    int N_MASS_INTERP;
-    int N_COND_INTERP;
-    int N_PROB_INTERP;
-    double MIN_LOGPROB;
-    int SAMPLE_METHOD;
-    int AVG_BELOW_SAMPLER;
-
     bool USE_ADIABATIC_FLUCTUATIONS;
 };
 
@@ -153,10 +142,8 @@ extern struct GlobalParams global_params = {
     .OMtot = 1.0,
     .Y_He = 0.245,
     .wl = -1.0,
-    // .SHETH_b = 0.485, //In the literature this is 0.485 (RM08) or 0.5 (SMT01) Master 21cmFAST currently has 0.15
-    // .SHETH_c = 0.615, //In the literature this is 0.615 (RM08) or 0.6 (SMT01) Master 21cmFAST currently has 0.05
-    .SHETH_b = 0.15,
-    .SHETH_c = 0.05,
+    .SHETH_b = 0.15, //In the literature this is 0.485 (RM08) or 0.5 (SMT01) or 0.34 (Barkana+01) Master 21cmFAST currently has 0.15
+    .SHETH_c = 0.05, //In the literature this is 0.615 (RM08) or 0.6 (SMT01) or 0.81 (Barkana+01) Master 21cmFAST currently has 0.05
     .Zreion_HeII = 3.0,
     .FILTER = 0,
     .R_BUBBLE_MIN = 0.620350491,
@@ -167,15 +154,5 @@ extern struct GlobalParams global_params = {
 
     .VAVG=25.86,
 
-    .STOC_MASS_TOL = 5.0, //effectively infinite, mass tolerance semi-deprecated
-    .SAMPLER_MIN_MASS = 5e7,
-    .MAXHALO_FACTOR = 2,
-    .N_MASS_INTERP = 200,
-    .N_COND_INTERP = 200,
-    .N_PROB_INTERP = 200,
-    .MIN_LOGPROB = -16,
-    .SAMPLE_METHOD = 0,
-    .AVG_BELOW_SAMPLER = 0,
-
     .USE_ADIABATIC_FLUCTUATIONS = 1,
 };