proteneer · badisa · Jun 10, 2024 · Jun 6, 2024 · Jun 6, 2024 · Jun 6, 2024
diff --git a/tests/data/zero_overlap_ukln.npz b/tests/data/zero_overlap_ukln.npz
diff --git a/tests/test_bar.py b/tests/test_bar.py
@@ -1,4 +1,5 @@
 from functools import partial
+from pathlib import Path
 from typing import Tuple
 
 import numpy as np
@@ -83,9 +84,15 @@ def test_bootstrap_bar(sigma):
 
     # estimate 3 times
     df_ref, df_err_ref = df_and_err_from_u_kln(u_kln)
-    df_0, bootstrap_samples = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap)
+    df_0, ddf_0, bootstrap_samples = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap)
     df_1, bootstrap_sigma = bar_with_bootstrapped_uncertainty(u_kln)
 
+    # Full errors should match exactly
+    assert df_err_ref == ddf_0
+
+    # The bootstrapped error should be as large or larger than the full error
+    assert bootstrap_sigma >= df_err_ref
+
     # assert estimates identical, uncertainties comparable
     print(f"bootstrap uncertainty = {bootstrap_sigma}, pymbar.MBAR uncertainty = {df_err_ref}")
     assert df_0 == df_ref
@@ -216,3 +223,15 @@ def test_compute_fwd_and_reverse_df_over_time(frames_per_step):
     # The values at the end should be nearly identical since they contain all the samples
     assert np.allclose(fwd[-1], rev[-1])
     assert np.allclose(fwd_err[-1], rev_err[-1])
+
+
+def test_bootstrap_bar_and_regular_bar_match():
+    """In cases where the u_kln has effectively no overlap, that bootstrapping returns 0.0 as the error
+    since the BAR estimate is always zero.
+    """
+    test_ukln = Path(__file__).parent / "data" / "zero_overlap_ukln.npz"
+    u_kln = np.load(open(test_ukln, "rb"))["u_kln"]
+    boot_df, boot_df_err = bar_with_bootstrapped_uncertainty(u_kln)
+    df, df_err = df_and_err_from_u_kln(u_kln)
+    assert boot_df == df
+    np.testing.assert_allclose(boot_df_err, df_err)
diff --git a/timemachine/fe/bar.py b/timemachine/fe/bar.py
@@ -151,7 +151,7 @@ def df_from_u_kln(
 
 def bootstrap_bar(
     u_kln: NDArray, n_bootstrap: int = 100, maximum_iterations: int = DEFAULT_MAXIMUM_ITERATIONS
-) -> Tuple[float, NDArray]:
+) -> Tuple[float, float, NDArray]:
     """Given a 2-state u_kln matrix, subsample u_kln with replacement and re-run df_from_u_kln many times
 
     Parameters
@@ -166,8 +166,12 @@ def bootstrap_bar(
     Returns
     -------
     best_estimate : float
-        BAR(w_F, w_R, computeUncertainty=False)
-    bootstrap_samples: array
+        BAR(w_F, w_R)
+
+    best_estimate_err : float
+        Full BAR(w_F, w_R) error estimate
+
+    bootstrap_samples : array
         shape (n_bootstrap,)
 
     Notes
@@ -178,7 +182,9 @@ def bootstrap_bar(
     u_kn, N_k = ukln_to_ukn(u_kln)
     mbar = pymbar.MBAR(u_kn, N_k, maximum_iterations=maximum_iterations)
 
-    full_bar_result = mbar.getFreeEnergyDifferences(compute_uncertainty=False)[0][0, 1]
+    df, ddf = mbar.getFreeEnergyDifferences()
+    full_bar_result = df[0, 1]
+    full_bar_err = ddf[0, 1]
 
     _, _, n = u_kln.shape
 
@@ -196,24 +202,24 @@ def bootstrap_bar(
         )
         bootstrap_samples.append(bar_result)
 
-    return full_bar_result, np.array(bootstrap_samples)
+    return full_bar_result, full_bar_err, np.array(bootstrap_samples)
 
 
 def bar_with_bootstrapped_uncertainty(
     u_kln: NDArray, n_bootstrap=100, maximum_iterations: int = DEFAULT_MAXIMUM_ITERATIONS
 ) -> Tuple[float, float]:
     """Given 2-state u_kln, returns free energy difference and uncertainty computed by bootstrapping."""
 
-    df, bootstrap_dfs = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap, maximum_iterations=maximum_iterations)
+    df, ddf, bootstrap_dfs = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap, maximum_iterations=maximum_iterations)
 
     # warn if bootstrap distribution deviates significantly from normality
     normaltest_result = normaltest(bootstrap_dfs)
     pvalue_threshold = 1e-3  # arbitrary, small
     if normaltest_result.pvalue < pvalue_threshold:
         logger.warning(f"bootstrapped errors non-normal: {normaltest_result}")
 
-    # regardless, summarize as if normal
-    ddf = np.std(bootstrap_dfs)
+    # Take the max of the full error estimate and the bootstrapped error. Summarize as if normal regardless
+    ddf = max(ddf, np.std(bootstrap_dfs))
     return df, ddf