Skip to content

Commit

Permalink
Merge pull request #4016 from pleroy/CoeffRounding
Browse files Browse the repository at this point in the history
Round the polynomial coefficients using Mathematica and take advantage of powers of two
pleroy authored Jun 3, 2024
2 parents 9fb3103 + 31a00be commit 47ea231
Showing 4 changed files with 137 additions and 43 deletions.
68 changes: 27 additions & 41 deletions benchmarks/elementary_functions_experiments_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -152,7 +152,9 @@ class SingleTableImplementation {
Value cos_x;
};

static constexpr Value cos_polynomial_0 = -0.499999999999999999999872434553;
// If this was ever changed to a value that is not a power of 2, extra care
// would be needed in the computations that use it.
static constexpr Value cos_polynomial_0 = -0.5;

Value SinPolynomial(Argument x);
Value CosPolynomial1(Argument x);
@@ -214,27 +216,22 @@ Value TableSpacingImplementation<table_spacing>::SinPolynomial(
Argument const x) {
if constexpr (table_spacing == 2.0 / 256.0) {
// 71 bits.
return -0.166666666666666666666421797625 +
0.00833333057503280528178543245797 * x;
return -0x1.5555555555555p-3 + 0x1.11110B24ACC74p-7 * x;
} else if constexpr (table_spacing == 2.0 / 1024.0) {
// 85 bits.
return -0.166666666666666666666666651721 +
0.00833333316093951937646271666739 * x;
// 84 bits.
return -0x1.5555555555555p-3 + 0x1.111110B24ACB5p-7 * x;
}
}

template<Argument table_spacing>
Value TableSpacingImplementation<table_spacing>::CosPolynomial(
Argument const x) {
if constexpr (table_spacing == 2.0 / 256.0) {
// 77 bits.
return -0.499999999999999999999999974543 +
x * (0.0416666666666633318024480868405 -
0.00138888829905860875255146938745 * x);
// 83 bits.
return -0.5 + x * (0x1.5555555555555p-5 - 0x1.6C16BB6B46CA6p-10 * x);
} else if constexpr (table_spacing == 2.0 / 1024.0) {
// 72 bits.
return -0.499999999999999999999872434553 +
0.0416666654823785864634569932662 * x;
return -0.5 + 0x1.555554B290E6Ap-5 * x;
}
}

@@ -324,9 +321,8 @@ void MultiTableImplementation::SelectCutoff(Argument const x,
}

Value MultiTableImplementation::SinPolynomial(Argument const x) {
// 85 bits. Works for all binades.
return -0.166666666666666666666666651721 +
0.00833333316093951937646271666739 * x;
// 84 bits. Works for all binades.
return -0x1.5555555555555p-3 + 0x1.111110B24ACB5p-7 * x;
}

Value MultiTableImplementation::CosPolynomial(std::int64_t const i,
@@ -335,17 +331,14 @@ Value MultiTableImplementation::CosPolynomial(std::int64_t const i,
// we have to use 3 polynomials.
// i == 1 goes first because it is the largest argument interval.
if (i == 1) {
// 78 bits.
return -0.499999999999999999999998006790 +
0.0416666663705946726372008045758 * x;
// 76 bits.
return -0.5 + 0x1.5555552CA439Ep-5 * x;
} else if (i == 0) {
// 72 bits.
return -0.499999999999999999999872434553 +
0.0416666654823785864634569932662 * x;
return -0.5 + 0x1.555554B290E6Ap-5 * x;
} else {
// 84 bits.
return -0.499999999999999999999999968856 +
0.0416666665926486697856340784849 * x;
// 78 bits.
return -0.5 + 0x1.5555554B290E8p-5 * x;
}
}

@@ -379,8 +372,7 @@ Value SingleTableImplementation::Sin(Argument const x) {
// TODO(phl): Error analysis of this computation.
auto const h² = TwoProduct(h, h);
auto const h³ = h².value * h;
auto const h²_sin_x₀_cos_polynomial_0 =
h² * TwoProduct(sin_x₀, cos_polynomial_0);
auto const h²_sin_x₀_cos_polynomial_0 = h² * (sin_x₀ * cos_polynomial_0);
auto const terms_up_to_h² = QuickTwoSum(sin_x₀_plus_h_cos_x₀.value,
h²_sin_x₀_cos_polynomial_0.value);
return terms_up_to_h².value +
@@ -409,8 +401,7 @@ Value SingleTableImplementation::Cos(Argument const x) {
// TODO(phl): Error analysis of this computation.
auto const h² = TwoProduct(h, h);
auto const h³ = h².value * h;
auto const h²_cos_x₀_cos_polynomial_0 =
h² * TwoProduct(cos_x₀, cos_polynomial_0);
auto const h²_cos_x₀_cos_polynomial_0 = h² * (cos_x₀ * cos_polynomial_0);
auto const terms_up_to_h² = QuickTwoSum(cos_x₀_minus_h_sin_x₀.value,
h²_cos_x₀_cos_polynomial_0.value);
return terms_up_to_h².value +
@@ -420,24 +411,19 @@ Value SingleTableImplementation::Cos(Argument const x) {
}
}

Value SingleTableImplementation::SinPolynomial(
Argument const x) {
// 85 bits.
return -0.166666666666666666666666651721 +
0.00833333316093951937646271666739 * x;
Value SingleTableImplementation::SinPolynomial(Argument const x) {
// 84 bits. Works for all binades.
return -0x1.5555555555555p-3 + 0x1.111110B24ACB5p-7 * x;
}

Value SingleTableImplementation::CosPolynomial1(
Argument const x) {
Value SingleTableImplementation::CosPolynomial1(Argument const x) {
// 72 bits.
return cos_polynomial_0 + 0.0416666654823785864634569932662 * x;
return cos_polynomial_0 + 0x1.555554B290E6Ap-5 * x;
}

Value SingleTableImplementation::CosPolynomial2(
Argument const x) {
// 101 bits.
return x * (0.04166666666666665363986848039146102332933 -
0.001388888852024502693312293343727757316234 * x);
Value SingleTableImplementation::CosPolynomial2(Argument const x) {
// 97 bits.
return x * (0x1.5555555555555p-5 - 0x1.6C16C10C09C11p-10 * x);
}

template<Argument table_spacing>
@@ -461,7 +447,7 @@ void BM_ExperimentSinTableSpacing(benchmark::State& state) {
// The implementation is not accurate, but let's check that it's not
// broken.
auto const absolute_error = Abs(v[i] - std::sin(a[i]));
CHECK_LT(absolute_error, 5.6e-17);
CHECK_LT(absolute_error, 1.2e-16);
#endif
}
benchmark::DoNotOptimize(v);
6 changes: 4 additions & 2 deletions mathematica/floating_point.wl
Original file line number Diff line number Diff line change
@@ -36,9 +36,11 @@ Bits;


HexLiteral;
Attributes[HexLiteral]={Listable}


CorrectlyRound;
Attributes[CorrectlyRound]={Listable};


Begin["`Private`"]
@@ -101,7 +103,7 @@ leastFullHexDigitValue:=2^(significandBits-1)/16^fullHexDigits
leastHexDigitValue:=If[leastFullHexDigitValue>1,leastFullHexDigitValue/16,leastFullHexDigitValue]


HexLiteral[n_]:=If[n<0,"-",""]<>
HexLiteral[n_]:=If[n==0,"0.0",If[n<0,"-",""]<>
"0x1."<>ToUpperCase[
IntegerString[
Mod[IntegerPart[Representation[Abs[n]]/leastFullHexDigitValue],16^fullHexDigits],16,fullHexDigits]<>
@@ -120,7 +122,7 @@ Switch[
binary32,"f",
binary64,"",
x87extended,"l",
_,"_"<>ToString[significandBits]<>"_sigbits"]
_,"_"<>ToString[significandBits]<>"_sigbits"]]


smol=-12;
32 changes: 32 additions & 0 deletions numerics/double_precision.hpp
Original file line number Diff line number Diff line change
@@ -126,18 +126,50 @@ DoublePrecision<Difference<T>> operator+(DoublePrecision<T> const& left);
template<typename T>
DoublePrecision<Difference<T>> operator-(DoublePrecision<T> const& left);

template<typename T, typename U>
DoublePrecision<Sum<T, U>> operator+(T const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Sum<T, U>> operator+(DoublePrecision<T> const& left,
U const& right);

template<typename T, typename U>
DoublePrecision<Sum<T, U>> operator+(DoublePrecision<T> const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Difference<T, U>> operator-(T const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Difference<T, U>> operator-(DoublePrecision<T> const& left,
U const& right);

template<typename T, typename U>
DoublePrecision<Difference<T, U>> operator-(DoublePrecision<T> const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Product<T, U>> operator*(T const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Product<T, U>> operator*(DoublePrecision<T> const& left,
U const& right);

template<typename T, typename U>
DoublePrecision<Product<T, U>> operator*(DoublePrecision<T> const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Quotient<T, U>> operator/(T const& left,
DoublePrecision<U> const& right);

template<typename T, typename U>
DoublePrecision<Quotient<T, U>> operator/(DoublePrecision<T> const& left,
U const& right);

template<typename T, typename U>
DoublePrecision<Quotient<T, U>> operator/(DoublePrecision<T> const& left,
DoublePrecision<U> const& right);
74 changes: 74 additions & 0 deletions numerics/double_precision_body.hpp
Original file line number Diff line number Diff line change
@@ -396,6 +396,22 @@ DoublePrecision<Difference<T>> operator-(DoublePrecision<T> const& left) {
return result;
}

template<typename T, typename U>
DoublePrecision<Sum<T, U>> operator+(T const& left,
DoublePrecision<U> const& right) {
// [Lin81], algorithm longadd.
auto const sum = TwoSum(left, right.value);
return QuickTwoSum(sum.value, sum.error + right.error);
}

template<typename T, typename U>
DoublePrecision<Sum<T, U>> operator+(DoublePrecision<T> const& left,
U const& right) {
// [Lin81], algorithm longadd.
auto const sum = TwoSum(left.value, right);
return QuickTwoSum(sum.value, sum.error + left.error);
}

template<typename T, typename U>
DoublePrecision<Sum<T, U>> operator+(DoublePrecision<T> const& left,
DoublePrecision<U> const& right) {
@@ -404,6 +420,22 @@ DoublePrecision<Sum<T, U>> operator+(DoublePrecision<T> const& left,
return QuickTwoSum(sum.value, (sum.error + left.error) + right.error);
}

template<typename T, typename U>
DoublePrecision<Difference<T, U>> operator-(T const& left,
DoublePrecision<U> const& right) {
// [Lin81], algorithm longadd.
auto const sum = TwoDifference(left, right.value);
return QuickTwoSum(sum.value, sum.error - right.error);
}

template<typename T, typename U>
DoublePrecision<Difference<T, U>> operator-(DoublePrecision<T> const& left,
U const& right) {
// [Lin81], algorithm longadd.
auto const sum = TwoDifference(left.value, right);
return QuickTwoSum(sum.value, sum.error + left.error);
}

template<typename T, typename U>
DoublePrecision<Difference<T, U>> operator-(DoublePrecision<T> const& left,
DoublePrecision<U> const& right) {
@@ -412,6 +444,26 @@ DoublePrecision<Difference<T, U>> operator-(DoublePrecision<T> const& left,
return QuickTwoSum(sum.value, (sum.error + left.error) - right.error);
}

template<typename T, typename U>
FORCE_INLINE(inline)
DoublePrecision<Product<T, U>> operator*(T const& left,
DoublePrecision<U> const& right) {
// [Lin81], algorithm longmul.
auto product = TwoProduct(left, right.value);
product.error += left.value * right.error;
return QuickTwoSum(product.value, product.error);
}

template<typename T, typename U>
FORCE_INLINE(inline)
DoublePrecision<Product<T, U>> operator*(DoublePrecision<T> const& left,
U const& right) {
// [Lin81], algorithm longmul.
auto product = TwoProduct(left.value, right);
product.error += +left.error * right;
return QuickTwoSum(product.value, product.error);
}

template<typename T, typename U>
FORCE_INLINE(inline)
DoublePrecision<Product<T, U>> operator*(DoublePrecision<T> const& left,
@@ -423,6 +475,28 @@ DoublePrecision<Product<T, U>> operator*(DoublePrecision<T> const& left,
return QuickTwoSum(product.value, product.error);
}

template<typename T, typename U>
DoublePrecision<Quotient<T, U>> operator/(T const& left,
DoublePrecision<U> const& right) {
// [Lin81], algorithm longdiv.
auto const z = left / right.value;
auto const product = TwoProduct(right.value, z);
auto const zz = (((left - product.value) - product.error) - z * right.error) /
(right.value + right.error);
return QuickTwoSum(z, zz);
}

template<typename T, typename U>
DoublePrecision<Quotient<T, U>> operator/(DoublePrecision<T> const& left,
U const& right) {
// [Lin81], algorithm longdiv.
auto const z = left.value / right;
auto const product = TwoProduct(right, z);
auto const zz =
(((left.value - product.value) - product.error) + left.error) / right;
return QuickTwoSum(z, zz);
}

template<typename T, typename U>
DoublePrecision<Quotient<T, U>> operator/(DoublePrecision<T> const& left,
DoublePrecision<U> const& right) {

0 comments on commit 47ea231

Please sign in to comment.