Ekumen-OS · nahueespinosa · Nov 14, 2022 · Nov 6, 2022 · Nov 7, 2022 · Nov 11, 2022
diff --git a/beluga/CMakeLists.txt b/beluga/CMakeLists.txt
@@ -4,7 +4,7 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR})
 list(APPEND CMAKE_PREFIX_PATH ${CMAKE_CURRENT_BINARY_DIR})
 
 conan_cmake_configure(
-  REQUIRES   range-v3/0.12.0
+  REQUIRES   abseil/20220623.0 range-v3/0.12.0
   GENERATORS cmake_find_package)
 
 conan_cmake_autodetect(settings)

diff --git a/beluga/core/CMakeLists.txt b/beluga/core/CMakeLists.txt
@@ -1,12 +1,14 @@
 find_package(range-v3)
+find_package(absl)
 
 add_library(core INTERFACE)
 target_include_directories(core INTERFACE include)
-target_link_libraries(core INTERFACE range-v3::range-v3)
+target_link_libraries(core INTERFACE absl::absl range-v3::range-v3)
 target_compile_features(core INTERFACE cxx_std_17)
 
 add_executable(core_benchmark)
 target_sources(core_benchmark PRIVATE
+  benchmark/sampling_benchmark.cpp
   benchmark/tuple_vector_benchmark.cpp
   benchmark/spatial_hash_benchmark.cpp)
 target_link_libraries(core_benchmark PRIVATE benchmark_main core)
@@ -15,6 +17,7 @@ target_compile_options(core_benchmark PRIVATE -Wall -Werror)
 add_executable(core_test)
 target_sources(core_test PRIVATE
   test/particle_filter_test.cpp
+  test/sampling_test.cpp
   test/tuple_vector_test.cpp
   test/spatial_hash_test.cpp)
 target_link_libraries(core_test PRIVATE gmock_main core)

diff --git a/beluga/core/benchmark/sampling_benchmark.cpp b/beluga/core/benchmark/sampling_benchmark.cpp
@@ -0,0 +1,100 @@
+#include <benchmark/benchmark.h>
+
+#include <range/v3/algorithm.hpp>
+#include <range/v3/view.hpp>
+
+#include <beluga/algorithm/sampling.h>
+#include <beluga/tuple_vector.h>
+#include <beluga/type_traits.h>
+
+namespace {
+
+struct State {
+  double x = 0.;
+  double y = 0.;
+  double theta = 0.;
+};
+
+}  // namespace
+
+template <>
+struct beluga::spatial_hash<State> {
+  std::size_t operator()(const State& state, double resolution = 1.) const {
+    const auto pair = std::make_tuple(state.x, state.y);
+    return beluga::spatial_hash<std::decay_t<decltype(pair)>>{}(pair, resolution);
+  }
+};
+
+namespace {
+
+using Particle = std::tuple<State, double, std::size_t>;
+using Container = beluga::TupleVector<Particle>;
+
+void BM_FixedResample(benchmark::State& state) {
+  const std::size_t particle_count = state.range(0);
+  state.SetComplexityN(particle_count);
+
+  auto container = Container{particle_count};
+  auto new_container = Container{particle_count};
+  auto generator = std::mt19937{std::random_device()()};
+
+  for (auto&& [state, weight, cluster] : beluga::views::all(container)) {
+    weight = 1.;
+  }
+
+  std::size_t min_samples = particle_count;
+
+  for (auto _ : state) {
+    auto&& samples = ranges::views::generate(beluga::random_sample(
+                         beluga::views::all(container), beluga::views::weights(container), generator)) |
+                     ranges::views::take_exactly(min_samples) | ranges::views::common;
+    auto first = std::begin(beluga::views::all(new_container));
+    auto last = std::copy(std::begin(samples), std::end(samples), first);
+    state.counters["SampleSize"] = std::distance(first, last);
+  }
+}
+
+BENCHMARK(BM_FixedResample)->MinWarmUpTime(1)->RangeMultiplier(2)->Range(128, 1'000'000)->Complexity();
+
+void BM_AdaptiveResample(benchmark::State& state) {
+  const std::size_t particle_count = state.range(0);
+  state.SetComplexityN(particle_count);
+
+  auto container = Container{particle_count};
+  auto new_container = Container{particle_count};
+  auto generator = std::mt19937{std::random_device()()};
+
+  double step = 0;
+  int i = 0;
+  for (auto&& [state, weight, cluster] : beluga::views::all(container)) {
+    weight = 1.;
+    state.x = step;
+    if (i++ % 2 == 0) {
+      step += 0.05;
+    }
+  }
+
+  std::size_t min_samples = 0;
+  std::size_t max_samples = particle_count;
+  double resolution = 1.;
+  double kld_epsilon = 0.05;
+  double kld_upper_quantile = 0.95;
+
+  for (auto _ : state) {
+    auto&& samples =
+        ranges::views::generate(
+            beluga::random_sample(beluga::views::all(container), beluga::views::weights(container), generator)) |
+        ranges::views::transform(beluga::set_cluster(resolution)) |
+        ranges::views::take_while(
+            beluga::kld_condition(min_samples, kld_epsilon, kld_upper_quantile), beluga::cluster<const Particle&>) |
+        ranges::views::take(max_samples) | ranges::views::common;
+    auto first = std::begin(beluga::views::all(new_container));
+    auto last = std::copy(std::begin(samples), std::end(samples), first);
+    state.counters["SampleSize"] = std::distance(first, last);
+    state.counters["Percentage"] = static_cast<double>(std::distance(first, last)) / particle_count;
+  }
+}
+
+BENCHMARK(BM_AdaptiveResample)->MinWarmUpTime(1)->RangeMultiplier(2)->Range(128, 1'000'000)->Complexity();
+
+}  // namespace
diff --git a/beluga/core/include/beluga/algorithm.h b/beluga/core/include/beluga/algorithm.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#include <beluga/algorithm/sampling.h>
diff --git a/beluga/core/include/beluga/algorithm/sampling.h b/beluga/core/include/beluga/algorithm/sampling.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <functional>
+#include <random>
+#include <unordered_set>
+
+#include <absl/random/discrete_distribution.h>
+#include <beluga/spatial_hash.h>
+#include <beluga/type_traits.h>
+
+namespace beluga {
+
+template <class Function1, class Function2, class RandomNumberGenerator>
+auto random_select(Function1&& first, Function2&& second, RandomNumberGenerator& generator, double probability) {
+  return [&, distribution = std::bernoulli_distribution{probability}]() mutable {
+    return distribution(generator) ? first() : second();
+  };
+}
+
+template <class Range, class Weights, class RandomNumberGenerator>
+auto random_sample(const Range& samples, const Weights& weights, RandomNumberGenerator& generator) {
+  return [&generator, first = std::begin(samples),
+          distribution = absl::discrete_distribution<std::size_t>{std::begin(weights), std::end(weights)}]() mutable {
+    return *(first + distribution(generator));
+  };
+}
+
+inline auto set_cluster(double resolution) {
+  return [resolution](auto&& particle) {
+    using state_type = std::decay_t<decltype(state(particle))>;
+    auto new_particle = particle;
+    cluster(new_particle) = spatial_hash<state_type>{}(state(particle), resolution);
+    return new_particle;
+  };
+}
+
+inline auto kld_condition(std::size_t min, double epsilon = 0.05, double upper_quantile = 0.95) {
+  // Compute minimum number of samples based on a Kullback-Leibler distance epsilon
+  // between the maximum likelihood estimate and the true distribution.
+  auto target_size = [upper_quantile, epsilon](std::size_t k) {
+    double common = 2. / (9 * (k - 1));
+    double base = 1. - common - std::sqrt(common) * upper_quantile;
+    double result = ((k - 1) / epsilon) * base * base * base;
+    return static_cast<std::size_t>(std::ceil(result));
+  };
+
+  return [=, count = 0ULL, buckets = std::unordered_set<std::size_t>{}](std::size_t hash) mutable {
+    count++;
+    buckets.insert(hash);
+    auto cluster_count = buckets.size();
+    bool target_not_reached = cluster_count <= 2U ? true : count < target_size(cluster_count);
+    return count < min || target_not_reached;
+  };
+}
+
+}  // namespace beluga
diff --git a/beluga/core/include/beluga/type_traits/particle_traits.h b/beluga/core/include/beluga/type_traits/particle_traits.h
@@ -34,6 +34,7 @@ struct particle_traits {};
 
 template <template <class...> class Pair, class State, class... Extra>
 struct particle_traits<Pair<State, double, Extra...>> {
+  using state_type = State;
   using value_type = Pair<State, double>;
 
   template <class T>
@@ -53,6 +54,7 @@ struct particle_traits<Pair<State, double, Extra...>> {
 
 template <template <class...> class Tuple, class State, class... Extra>
 struct particle_traits<Tuple<State, double, std::size_t, Extra...>> {
+  using state_type = State;
   using value_type = Tuple<State, double, std::size_t>;
 
   template <class T>
@@ -129,4 +131,11 @@ constexpr auto clusters(Container&& container) {
 
 }  // namespace views
 
+template <class Particle, class T = typename particle_traits<Particle>::state_type>
+constexpr auto make_from_state(T&& value) {
+  auto particle = Particle{};
+  state(particle) = std::forward<T>(value);
+  return particle;
+}
+
 }  // namespace beluga
diff --git a/beluga/core/test/sampling_test.cpp b/beluga/core/test/sampling_test.cpp
@@ -0,0 +1,94 @@
+#include <gtest/gtest.h>
+
+#include <range/v3/view.hpp>
+
+#include <beluga/algorithm/sampling.h>
+
+namespace {
+
+class RandomSelectWithParam : public ::testing::TestWithParam<double> {};
+
+TEST_P(RandomSelectWithParam, Functional) {
+  const double probability = GetParam();
+  auto generator = std::mt19937{std::random_device()()};
+  auto output =
+      ranges::views::generate(beluga::random_select([]() { return 1; }, []() { return 2; }, generator, probability)) |
+      ranges::views::take_exactly(1'000'000);
+
+  std::size_t one_count = 0;
+  std::size_t two_count = 0;
+  for (auto&& value : output) {
+    if (value == 1) {
+      ++one_count;
+    }
+    if (value == 2) {
+      ++two_count;
+    }
+  }
+  ASSERT_NEAR(probability, static_cast<double>(one_count) / (one_count + two_count), 0.01);
+}
+
+INSTANTIATE_TEST_SUITE_P(Probabilities, RandomSelectWithParam, testing::Values(0.0, 0.2, 0.4, 0.6, 0.8, 1.0));
+
+TEST(RandomSample, Functional) {
+  const std::size_t samples = 1'000'000;
+  auto values = std::array<int, 4>{1, 2, 3, 4};
+  auto weights = std::array<double, 4>{0.1, 0.4, 0.3, 0.2};
+  auto generator = std::mt19937{std::random_device()()};
+  auto output =
+      ranges::views::generate(beluga::random_sample(values, weights, generator)) | ranges::views::take_exactly(samples);
+
+  auto counters = std::array<std::size_t, 4>{0, 0, 0, 0};
+  for (auto&& value : output) {
+    auto* it = std::find(std::begin(values), std::end(values), value);
+    ASSERT_NE(it, std::end(values)) << "Random sample returned an unknown value.";
+    auto& counter = *(std::begin(counters) + std::distance(std::begin(values), it));
+    ++counter;
+  }
+
+  for (std::size_t i = 0; i < counters.size(); ++i) {
+    ASSERT_NEAR(weights[i], static_cast<double>(counters[i]) / samples, 0.01);
+  }
+}
+
+class KLDConditionWithParam : public ::testing::TestWithParam<std::pair<std::size_t, std::size_t>> {};
+
+TEST_P(KLDConditionWithParam, Minimum) {
+  const std::size_t cluster_count = GetParam().first;
+  const std::size_t fixed_min_samples = 1'000;
+  auto predicate = beluga::kld_condition(fixed_min_samples);
+  std::size_t cluster = 0;
+  for (std::size_t i = 0; i < fixed_min_samples - 1; ++i) {
+    ASSERT_TRUE(predicate(cluster)) << "Stopped at " << i + 1 << " samples (Expected: " << fixed_min_samples << ").";
+    if (cluster < cluster_count - 1) {
+      ++cluster;
+    }
+  }
+}
+
+TEST_P(KLDConditionWithParam, Limit) {
+  const std::size_t cluster_count = GetParam().first;
+  const std::size_t min_samples = GetParam().second;
+  auto predicate = beluga::kld_condition(0, 0.05, 0.95);
+  std::size_t cluster = 0;
+  for (std::size_t i = 0; i < min_samples - 1; ++i) {
+    ASSERT_TRUE(predicate(cluster)) << "Stopped at " << i + 1 << " samples (Expected: " << min_samples << ").";
+    if (cluster < cluster_count - 1) {
+      ++cluster;
+    }
+  }
+  ASSERT_FALSE(predicate(cluster)) << "Didn't stop at " << min_samples << " samples.";
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    KLDPairs,
+    KLDConditionWithParam,
+    testing::Values(
+        std::make_pair(3, 8),
+        std::make_pair(4, 18),
+        std::make_pair(5, 30),
+        std::make_pair(6, 44),
+        std::make_pair(7, 57),
+        std::make_pair(100, 1'713)));
+
+}  // namespace
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#pragma once

		#include <beluga/algorithm/sampling.h>