Skip to content

Commit

Permalink
Merge pull request #4143 from eggrobin/nanobenchmarks
Browse files Browse the repository at this point in the history
Nanobenchmarks
  • Loading branch information
eggrobin authored Dec 29, 2024
2 parents 355e57f + c969067 commit 4e38e35
Show file tree
Hide file tree
Showing 14 changed files with 1,055 additions and 29 deletions.
28 changes: 28 additions & 0 deletions Principia.sln
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testing_utilities", "shared
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "functions", "functions\functions.vcxproj", "{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nanobenchmarks", "nanobenchmarks\nanobenchmarks.vcxproj", "{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -693,6 +695,30 @@ Global
{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}.Release|x64.Build.0 = Release|x64
{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}.Release|x86.ActiveCfg = Release|Win32
{7CCA653C-2E8F-4FFD-9E9F-BEE590F3EFAB}.Release|x86.Build.0 = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|Any CPU.ActiveCfg = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|Any CPU.Build.0 = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x64.ActiveCfg = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x64.Build.0 = Debug|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x86.ActiveCfg = Debug|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Debug|x86.Build.0 = Debug|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|Any CPU.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|Any CPU.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x64.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x64.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x86.ActiveCfg = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release KSP 1.7.3|x86.Build.0 = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|Any CPU.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|Any CPU.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x64.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x64.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x86.ActiveCfg = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release_LLVM|x86.Build.0 = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|Any CPU.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|Any CPU.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x64.ActiveCfg = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x64.Build.0 = Release|x64
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x86.ActiveCfg = Release|Win32
{1C6654C0-14E2-4A9E-B0E6-508B84FA8A0E}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -715,6 +741,8 @@ Global
EndGlobalSection
GlobalSection(SharedMSBuildProjectFiles) = preSolution
shared\base.vcxitems*{0fd08cdf-228c-48c6-8690-cf0a72cf6c69}*SharedItemsImports = 4
shared\base.vcxitems*{1c6654c0-14e2-4a9e-b0e6-508b84fa8a0e}*SharedItemsImports = 4
shared\numerics.vcxitems*{1c6654c0-14e2-4a9e-b0e6-508b84fa8a0e}*SharedItemsImports = 4
shared\base.vcxitems*{273987f9-5e73-43e6-868e-e9d3c137f01a}*SharedItemsImports = 4
shared\numerics.vcxitems*{273987f9-5e73-43e6-868e-e9d3c137f01a}*SharedItemsImports = 4
shared\base.vcxitems*{2e28828e-8364-4962-a9ff-c20a72eb884c}*SharedItemsImports = 4
Expand Down
67 changes: 38 additions & 29 deletions documentation/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,7 @@ @article{Kutta1901
url = {https://archive.org/details/zeitschriftfrma12runggoog/page/435},
date = {1901-11},
journaltitle = {Zeitschrift für Mathematik und Physik},
pages = {435453},
pages = {435--453},
title = {Beitrag zur näherungsweisen Integration totaler Differentialgleichungen},
volume = {46},
}
Expand Down Expand Up @@ -1170,8 +1170,8 @@ @article{Newhall1989
volume = {45},
}

@article{NguyễnStehlé2009,
author = {Nguyễn, Phong Q. and Stehlé, Damien},
@article{NguyễnStehlé2009,
author = {Nguyễn, Phong Q. and Stehlé, Damien},
url = {https://doi.org/10.1137/070705702},
date = {2009},
doi = {10.1137/070705702},
Expand Down Expand Up @@ -1728,7 +1728,7 @@ @book{Meeus1998
}

@book{MullerBrisebarreDeDinechinJeannerodLefevreMelquiondRevolStehleTorres2010,
author = {Muller, Jean-Michel and Brisebarre, Nicolas and De Dinechin, Florent and Jeannerod, Claude-Pierre and Lefèvre, Vincent and Melquiond, Guillaume and Revol, Nathalie and Stehlé, Damien and Torres, Serge},
author = {Muller, Jean-Michel and Brisebarre, Nicolas and De Dinechin, Florent and Jeannerod, Claude-Pierre and Lefèvre, Vincent and Melquiond, Guillaume and Revol, Nathalie and Stehlé, Damien and Torres, Serge},
publisher = {Birkhäuser},
date = {2010},
isbn = {978-0-8176-4704-9},
Expand All @@ -1752,7 +1752,7 @@ @book{NistHMF2010
}

@book{NocedalWright2006,
author = {Nocedal, Jorge, and Wright, Stephen J.},
author = {Nocedal, Jorge and Wright, Stephen J.},
publisher = {Springer},
date = {2006},
isbn = {978-0387-30303-1},
Expand Down Expand Up @@ -1869,7 +1869,7 @@ @inbook{ZatloukalJohnsonLadner2002
booktitle = {Near Neighbor Searches, and Methodology: Fifth and Sixth DIMACS Implementation Challenges},
date = {2002},
isbn = {0821828924},
pages = {69-86},
pages = {69--86},
title = {Nearest Neighbor Search for Data Compression},
}

Expand Down Expand Up @@ -2074,32 +2074,32 @@ @inproceedings{SofroniouSpaletta2002
venue = {Amsterdam, The Netherlands},
}

@inproceedings{StehléZimmermann2005,
author = {Stehlé, Damien and Zimmermann, Paul},
editor = {Montuschi, Paolo and Schwarz, Eric},
publisher = {IEEE Computer Society},
booktitle = {17th IEEE Symposium on Computer Arithmetic (ARITH'05)},
date = {2005-06},
doi = {10.1109/ARITH.2005.24},
eventdate = {2005-06-27/2005-06-29},
isbn = {0-7695-2366-8},
pages = {257--264},
title = {Gal's accurate tables method revisited},
venue = {Cape Cod, MA, USA},
@inproceedings{StehléZimmermann2005,
author = {Stehlé, Damien and Zimmermann, Paul},
editor = {Montuschi, Paolo and Schwarz, Eric},
publisher = {IEEE Computer Society},
booktitle = {17th IEEE Symposium on Computer Arithmetic (ARITH'05)},
date = {2005-06},
doi = {10.1109/ARITH.2005.24},
eventdate = {2005-06-27/2005-06-29},
isbn = {0-7695-2366-8},
pages = {257--264},
title = {Gal's accurate tables method revisited},
venue = {Cape Cod, MA, USA},
}

@inproceedings{WuZhang1991,
author = {Wu, Xiaolin and Zhang, Kaizhong},
editor = {Storer, James A. and Reif, John H.},
publisher = {IEEE Computer Society},
booktitle = {1991 Data Compression Conference},
date = {1991-04},
doi = {10.1109/DCC.1991.213341},
eventdate = {1991-04-08/1991-04-11},
isbn = {0-8186-9202-2},
pages = {392-401},
title = {A better tree-structured vector quantizer},
venue = {Snowbird, UT, USA},
author = {Wu, Xiaolin and Zhang, Kaizhong},
editor = {Storer, James A. and Reif, John H.},
publisher = {IEEE Computer Society},
booktitle = {1991 Data Compression Conference},
date = {1991-04},
doi = {10.1109/DCC.1991.213341},
eventdate = {1991-04-08/1991-04-11},
isbn = {0-8186-9202-2},
pages = {392--401},
title = {A better tree-structured vector quantizer},
venue = {Snowbird, UT, USA},
}

@mvbook{Fontenelle1758,
Expand Down Expand Up @@ -2224,6 +2224,15 @@ @report{LongCappellariVelezFuchs
type = {techreport},
}

@report{Paoloni2010,
author = {Paoloni, Gabriele},
institution = {Intel Corporation},
date = {2010-09},
number = {324264-001},
title = {How to Benchmark Code Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures},
type = {White Paper},
}

@report{RiesBettadpurEanesKangKoMcCulloughNagelPiePooleRichterSaveTapley2016,
author = {Ries, J. and Bettadpur, S. and Eanes, R. and Kang, Z. and Ko, U. and McCullough, C. and Nagel, P. and Pie, N. and Poole, S. and Save, H. and Tapley, B.},
institution = {Center for Space Research at the University of Texas at Austin},
Expand Down
Binary file modified documentation/bibliography.pdf
Binary file not shown.
58 changes: 58 additions & 0 deletions nanobenchmarks/examples.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <emmintrin.h>

#include "nanobenchmarks/function_registry.hpp"
#include "numerics/cbrt.hpp"

namespace principia {
namespace nanobenchmarks {
namespace _examples {

using namespace principia::numerics::_cbrt;

BENCHMARKED_FUNCTION(twice) {
return 2 * x;
}

BENCHMARKED_FUNCTION(thrice) {
return 3 * x;
}

BENCHMARKED_FUNCTION(inc) {
return x + 1;
}

BENCHMARKED_FUNCTION(add_4_times) {
return x * x * x * x * x;
}

BENCHMARKED_FUNCTION(add_16_times) {
return x + x + x + x + x + x + x + x + x + x + x + x + x + x + x + x + x;
}

BENCHMARKED_FUNCTION(square_root) {
__m128d x_0 = _mm_set_sd(x);
return _mm_cvtsd_f64(_mm_sqrt_sd(x_0, x_0));
}

BENCHMARKED_FUNCTION(sqrt_sqrt) {
__m128d x_0 = _mm_set_sd(x);
x_0 = _mm_sqrt_sd(x_0, x_0);
return _mm_cvtsd_f64(_mm_sqrt_sd(x_0, x_0));
}

BENCHMARKED_FUNCTION(square_root_division) {
__m128d x_0 = _mm_set_sd(x);
return _mm_cvtsd_f64(_mm_div_sd(x_0, _mm_sqrt_sd(x_0, x_0)));
}
BENCHMARK_FUNCTION(Cbrt);

using namespace principia::numerics::_cbrt::internal;

BENCHMARK_FUNCTION(method_3²ᴄZ5¹::Cbrt<Rounding::Faithful>);
BENCHMARK_FUNCTION(method_3²ᴄZ5¹::Cbrt<Rounding::Correct>);
BENCHMARK_FUNCTION(method_5²Z4¹FMA::Cbrt<Rounding::Faithful>);
BENCHMARK_FUNCTION(method_5²Z4¹FMA::Cbrt<Rounding::Correct>);

} // namespace _examples
} // namespace nanobenchmarks
} // namespace principia
42 changes: 42 additions & 0 deletions nanobenchmarks/function_registry.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "nanobenchmarks/function_registry.hpp"

#include <functional>
#include <map>
#include <string>

#include "glog/logging.h"

namespace principia {
namespace nanobenchmarks {
namespace _function_registry {
namespace internal {

bool FunctionRegistry::Register(std::string_view name,
BenchmarkedFunction function) {
CHECK(singleton_.names_by_function_.emplace(function, name).second)
<< " Registering function " << function << " as " << name << ": "
<< "function already registered as "
<< singleton_.names_by_function_[function];
CHECK(singleton_.functions_by_name_.emplace(name, function).second)
<< " Registering function " << function << " as " << name << ": "
<< " name already taken by "
<< singleton_.functions_by_name_.find(name)->second;
return true;
}

FunctionRegistry& FunctionRegistry::singleton_ = *new FunctionRegistry();

std::map<std::string, BenchmarkedFunction, std::less<>> const&
FunctionRegistry::functions_by_name() {
return singleton_.functions_by_name_;
}

std::map<BenchmarkedFunction, std::string> const&
FunctionRegistry::names_by_function() {
return singleton_.names_by_function_;
}

} // namespace internal
} // namespace _function_registry
} // namespace nanobenchmarks
} // namespace principia
61 changes: 61 additions & 0 deletions nanobenchmarks/function_registry.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#pragma once

#include <functional>
#include <map>
#include <string>
#include <string_view>

#include "base/macros.hpp"

namespace principia {
namespace nanobenchmarks {
namespace _function_registry {
namespace internal {

using BenchmarkedFunction = double (*)(double);

class FunctionRegistry {
public:
static bool Register(std::string_view name, BenchmarkedFunction function);
static std::map<std::string, BenchmarkedFunction, std::less<>> const&
functions_by_name();
static std::map<BenchmarkedFunction, std::string> const& names_by_function();

private:
FunctionRegistry() = default;
static FunctionRegistry& singleton_;
std::map<std::string, BenchmarkedFunction, std::less<>> functions_by_name_;
std::map<BenchmarkedFunction, std::string> names_by_function_;
};

#define BENCHMARK_FUNCTION_WITH_NAME(name, ...) \
BENCHMARK_FUNCTION_WITH_NAME_INTERNAL(__LINE__, name, __VA_ARGS__)
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL(line, name, ...) \
BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, __VA_ARGS__)
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, ...) \
namespace { \
static bool registered##line = ::principia::nanobenchmarks:: \
_function_registry::FunctionRegistry::Register(name, &(__VA_ARGS__)); \
}


#define BENCHMARK_FUNCTION(...) \
BENCHMARK_FUNCTION_WITH_NAME(#__VA_ARGS__, __VA_ARGS__)

#define BENCHMARKED_FUNCTION(f) \
double f(double x); \
BENCHMARK_FUNCTION(f); \
double f(double x)

#define BENCHMARK_EXTERN_C_FUNCTION(f) \
extern "C" double f(double); \
BENCHMARK_FUNCTION(f)

} // namespace internal

using internal::BenchmarkedFunction;
using internal::FunctionRegistry;

} // namespace _function_registry
} // namespace nanobenchmarks
} // namespace principia
Loading

0 comments on commit 4e38e35

Please sign in to comment.