Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sampling utilities and benchmarks #9

Merged
merged 8 commits into from
Nov 14, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion beluga/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR})
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_CURRENT_BINARY_DIR})

conan_cmake_configure(
REQUIRES range-v3/0.12.0
REQUIRES abseil/20220623.0 range-v3/0.12.0
GENERATORS cmake_find_package)

conan_cmake_autodetect(settings)
Expand Down
5 changes: 4 additions & 1 deletion beluga/core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
find_package(range-v3)
find_package(absl)

add_library(core INTERFACE)
target_include_directories(core INTERFACE include)
target_link_libraries(core INTERFACE range-v3::range-v3)
target_link_libraries(core INTERFACE absl::absl range-v3::range-v3)
target_compile_features(core INTERFACE cxx_std_17)

add_executable(core_benchmark)
target_sources(core_benchmark PRIVATE
benchmark/sampling_benchmark.cpp
benchmark/tuple_vector_benchmark.cpp
benchmark/spatial_hash_benchmark.cpp)
target_link_libraries(core_benchmark PRIVATE benchmark_main core)
Expand All @@ -15,6 +17,7 @@ target_compile_options(core_benchmark PRIVATE -Wall -Werror)
add_executable(core_test)
target_sources(core_test PRIVATE
test/particle_filter_test.cpp
test/sampling_test.cpp
test/tuple_vector_test.cpp
test/spatial_hash_test.cpp)
target_link_libraries(core_test PRIVATE gmock_main core)
Expand Down
100 changes: 100 additions & 0 deletions beluga/core/benchmark/sampling_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include <benchmark/benchmark.h>

#include <range/v3/algorithm.hpp>
#include <range/v3/view.hpp>

#include <beluga/algorithm/sampling.h>
#include <beluga/tuple_vector.h>
#include <beluga/type_traits.h>

namespace {

struct State {
double x = 0.;
double y = 0.;
double theta = 0.;
};

} // namespace

template <>
struct beluga::spatial_hash<State> {
std::size_t operator()(const State& state, double resolution = 1.) const {
const auto pair = std::make_tuple(state.x, state.y);
return beluga::spatial_hash<std::decay_t<decltype(pair)>>{}(pair, resolution);
}
};

namespace {

using Particle = std::tuple<State, double, std::size_t>;
using Container = beluga::TupleVector<Particle>;

void BM_FixedResample(benchmark::State& state) {
const std::size_t particle_count = state.range(0);
state.SetComplexityN(particle_count);

auto container = Container{particle_count};
auto new_container = Container{particle_count};
auto generator = std::mt19937{std::random_device()()};

for (auto&& [state, weight, cluster] : beluga::views::all(container)) {
weight = 1.;
}

std::size_t min_samples = particle_count;

for (auto _ : state) {
auto&& samples = ranges::views::generate(beluga::random_sample(
beluga::views::all(container), beluga::views::weights(container), generator)) |
ranges::views::take_exactly(min_samples) | ranges::views::common;
auto first = std::begin(beluga::views::all(new_container));
auto last = std::copy(std::begin(samples), std::end(samples), first);
state.counters["SampleSize"] = std::distance(first, last);
}
}

BENCHMARK(BM_FixedResample)->MinWarmUpTime(1)->RangeMultiplier(2)->Range(128, 1'000'000)->Complexity();

void BM_AdaptiveResample(benchmark::State& state) {
const std::size_t particle_count = state.range(0);
state.SetComplexityN(particle_count);

auto container = Container{particle_count};
auto new_container = Container{particle_count};
auto generator = std::mt19937{std::random_device()()};

double step = 0;
int i = 0;
for (auto&& [state, weight, cluster] : beluga::views::all(container)) {
weight = 1.;
state.x = step;
if (i++ % 2 == 0) {
step += 0.05;
}
}

std::size_t min_samples = 0;
std::size_t max_samples = particle_count;
double resolution = 1.;
double kld_epsilon = 0.05;
double kld_upper_quantile = 0.95;

for (auto _ : state) {
auto&& samples =
ranges::views::generate(
beluga::random_sample(beluga::views::all(container), beluga::views::weights(container), generator)) |
ranges::views::transform(beluga::set_cluster(resolution)) |
ranges::views::take_while(
beluga::kld_condition(min_samples, kld_epsilon, kld_upper_quantile), beluga::cluster<const Particle&>) |
ranges::views::take(max_samples) | ranges::views::common;
auto first = std::begin(beluga::views::all(new_container));
auto last = std::copy(std::begin(samples), std::end(samples), first);
state.counters["SampleSize"] = std::distance(first, last);
state.counters["Percentage"] = static_cast<double>(std::distance(first, last)) / particle_count;
}
}

BENCHMARK(BM_AdaptiveResample)->MinWarmUpTime(1)->RangeMultiplier(2)->Range(128, 1'000'000)->Complexity();

} // namespace
3 changes: 3 additions & 0 deletions beluga/core/include/beluga/algorithm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#pragma once

#include <beluga/algorithm/sampling.h>
56 changes: 56 additions & 0 deletions beluga/core/include/beluga/algorithm/sampling.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#pragma once

#include <functional>
#include <random>
#include <unordered_set>

#include <absl/random/discrete_distribution.h>
#include <beluga/spatial_hash.h>
#include <beluga/type_traits.h>

namespace beluga {

template <class Function1, class Function2, class RandomNumberGenerator>
auto random_select(Function1&& first, Function2&& second, RandomNumberGenerator& generator, double probability) {
return [&, distribution = std::bernoulli_distribution{probability}]() mutable {
return distribution(generator) ? first() : second();
};
}

template <class Range, class Weights, class RandomNumberGenerator>
auto random_sample(const Range& samples, const Weights& weights, RandomNumberGenerator& generator) {
return [&generator, first = std::begin(samples),
distribution = absl::discrete_distribution<std::size_t>{std::begin(weights), std::end(weights)}]() mutable {
return *(first + distribution(generator));
};
}

inline auto set_cluster(double resolution) {
return [resolution](auto&& particle) {
using state_type = std::decay_t<decltype(state(particle))>;
auto new_particle = particle;
cluster(new_particle) = spatial_hash<state_type>{}(state(particle), resolution);
return new_particle;
};
}

inline auto kld_condition(std::size_t min, double epsilon = 0.05, double upper_quantile = 0.95) {
// Compute minimum number of samples based on a Kullback-Leibler distance epsilon
// between the maximum likelihood estimate and the true distribution.
auto target_size = [upper_quantile, epsilon](std::size_t k) {
double common = 2. / (9 * (k - 1));
double base = 1. - common - std::sqrt(common) * upper_quantile;
nahueespinosa marked this conversation as resolved.
Show resolved Hide resolved
double result = ((k - 1) / epsilon) * base * base * base;
nahueespinosa marked this conversation as resolved.
Show resolved Hide resolved
return static_cast<std::size_t>(std::ceil(result));
};

return [=, count = 0ULL, buckets = std::unordered_set<std::size_t>{}](std::size_t hash) mutable {
count++;
buckets.insert(hash);
auto cluster_count = buckets.size();
bool target_not_reached = cluster_count <= 2U ? true : count < target_size(cluster_count);
return count < min || target_not_reached;
};
}

} // namespace beluga
9 changes: 9 additions & 0 deletions beluga/core/include/beluga/type_traits/particle_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct particle_traits {};

template <template <class...> class Pair, class State, class... Extra>
struct particle_traits<Pair<State, double, Extra...>> {
using state_type = State;
using value_type = Pair<State, double>;

template <class T>
Expand All @@ -53,6 +54,7 @@ struct particle_traits<Pair<State, double, Extra...>> {

template <template <class...> class Tuple, class State, class... Extra>
struct particle_traits<Tuple<State, double, std::size_t, Extra...>> {
using state_type = State;
using value_type = Tuple<State, double, std::size_t>;

template <class T>
Expand Down Expand Up @@ -129,4 +131,11 @@ constexpr auto clusters(Container&& container) {

} // namespace views

template <class Particle, class T = typename particle_traits<Particle>::state_type>
constexpr auto make_from_state(T&& value) {
auto particle = Particle{};
state(particle) = std::forward<T>(value);
return particle;
}

} // namespace beluga
94 changes: 94 additions & 0 deletions beluga/core/test/sampling_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include <gtest/gtest.h>

#include <range/v3/view.hpp>

#include <beluga/algorithm/sampling.h>

namespace {

class RandomSelectWithParam : public ::testing::TestWithParam<double> {};

TEST_P(RandomSelectWithParam, Functional) {
const double probability = GetParam();
auto generator = std::mt19937{std::random_device()()};
auto output =
ranges::views::generate(beluga::random_select([]() { return 1; }, []() { return 2; }, generator, probability)) |
ranges::views::take_exactly(1'000'000);

std::size_t one_count = 0;
std::size_t two_count = 0;
for (auto&& value : output) {
if (value == 1) {
++one_count;
}
if (value == 2) {
++two_count;
}
}
ASSERT_NEAR(probability, static_cast<double>(one_count) / (one_count + two_count), 0.01);
}

INSTANTIATE_TEST_SUITE_P(Probabilities, RandomSelectWithParam, testing::Values(0.0, 0.2, 0.4, 0.6, 0.8, 1.0));

TEST(RandomSample, Functional) {
const std::size_t samples = 1'000'000;
auto values = std::array<int, 4>{1, 2, 3, 4};
auto weights = std::array<double, 4>{0.1, 0.4, 0.3, 0.2};
auto generator = std::mt19937{std::random_device()()};
auto output =
ranges::views::generate(beluga::random_sample(values, weights, generator)) | ranges::views::take_exactly(samples);

auto counters = std::array<std::size_t, 4>{0, 0, 0, 0};
for (auto&& value : output) {
auto* it = std::find(std::begin(values), std::end(values), value);
ASSERT_NE(it, std::end(values)) << "Random sample returned an unknown value.";
auto& counter = *(std::begin(counters) + std::distance(std::begin(values), it));
++counter;
}

for (std::size_t i = 0; i < counters.size(); ++i) {
ASSERT_NEAR(weights[i], static_cast<double>(counters[i]) / samples, 0.01);
}
}

class KLDConditionWithParam : public ::testing::TestWithParam<std::pair<std::size_t, std::size_t>> {};

TEST_P(KLDConditionWithParam, Minimum) {
const std::size_t cluster_count = GetParam().first;
const std::size_t fixed_min_samples = 1'000;
auto predicate = beluga::kld_condition(fixed_min_samples);
std::size_t cluster = 0;
for (std::size_t i = 0; i < fixed_min_samples - 1; ++i) {
ASSERT_TRUE(predicate(cluster)) << "Stopped at " << i + 1 << " samples (Expected: " << fixed_min_samples << ").";
if (cluster < cluster_count - 1) {
++cluster;
}
}
}

TEST_P(KLDConditionWithParam, Limit) {
const std::size_t cluster_count = GetParam().first;
const std::size_t min_samples = GetParam().second;
auto predicate = beluga::kld_condition(0, 0.05, 0.95);
std::size_t cluster = 0;
for (std::size_t i = 0; i < min_samples - 1; ++i) {
ASSERT_TRUE(predicate(cluster)) << "Stopped at " << i + 1 << " samples (Expected: " << min_samples << ").";
if (cluster < cluster_count - 1) {
++cluster;
}
}
ASSERT_FALSE(predicate(cluster)) << "Didn't stop at " << min_samples << " samples.";
}

INSTANTIATE_TEST_SUITE_P(
KLDPairs,
KLDConditionWithParam,
testing::Values(
std::make_pair(3, 8),
std::make_pair(4, 18),
std::make_pair(5, 30),
std::make_pair(6, 44),
std::make_pair(7, 57),
std::make_pair(100, 1'713)));
nahueespinosa marked this conversation as resolved.
Show resolved Hide resolved

} // namespace