Skip to content

Commit

Permalink
Merge pull request #569 from hvdijk/improve-log2-extp
Browse files Browse the repository at this point in the history
Abacus: improve log2_extended_precision.
  • Loading branch information
hvdijk authored Oct 21, 2024
2 parents 62ef603 + ba49ea9 commit 5809479
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 164 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

namespace abacus {
namespace internal {
// Assumes exponent of x >= exponent of y
template <typename T>
inline T add_exact(const T x, const T y, T *out_remainder) {
// Assumes exponent of x >= exponent of y
inline T add_exact_unsafe(const T x, const T y, T *out_remainder) {
const T s = x + y;
const T z = s - x;

Expand All @@ -34,15 +34,24 @@ inline T add_exact(const T x, const T y, T *out_remainder) {

// Order of x and y does not matter
template <typename T>
inline void add_exact_safe(T *x, T *y) {
const T s = *x + *y;
const T a = s - *y;
inline T add_exact(const T x, const T y, T *out_remainder) {
const T s = x + y;
const T a = s - y;
const T b = s - a;
const T da = *x - a;
const T db = *y - b;
const T da = x - a;
const T db = y - b;
const T t = da + db;
*x = s;
*y = t;

*out_remainder = t;
return s;
}

template <typename T>
inline void add_exact_unsafe(T *x, T *y) {
T r1_lo{};
const T r1_hi = add_exact_unsafe<T>(*x, *y, &r1_lo);
*x = r1_hi;
*y = r1_lo;
}

template <typename T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,12 @@ static ABACUS_CONSTANT abacus_half
-0.1474609375f16};
#endif // __CA_BUILTINS_HALF_SUPPORT

// see maple worksheet for how coefficient was derived
static ABACUS_CONSTANT abacus_float
__codeplay_log2_extended_precision_coeff[16] = {
-.72134752044367709f, .48089834695927680f, -.36067376055899679f,
.28853900903746190f, -.24044913178516529f, .20609921341711487f,
-.18033920103731198f, .16030301033157655f, -.14420120724015226f,
.13106324614753728f, -.12135615255608116f, .11232690718087393f,
-0.92542744786243758e-1f, 0.84679695565939149e-1f, -.13981754775229332f,
.13540255679513207f};
// Aprroximation of log2(x+1) between [sqrt(0.5)-1;2*sqrt(0.5)-1]
// See log2_extended_precision.sollya for derivation
// The first terms are separated out to avoid double-precision arithmetic.
static ABACUS_CONSTANT abacus_float __codeplay_log2_extended_precision_coeff[] =
{0.33333301f, -0.25000026f, 0.20002578f, -0.16667923f, 0.14212508f,
-0.12400908f, 0.11926104f, -0.117190584f, 0.067263625f};
} // namespace

namespace abacus {
Expand All @@ -66,27 +63,40 @@ struct log2_extended_precision_helper<T, abacus_float> {
static T _(const T &xMant, T *out_remainder) {
T xMAnt1m = xMant - 1.0f;

T poly = abacus::internal::horner_polynomial(
xMAnt1m, __codeplay_log2_extended_precision_coeff);

T poly_times_x_lo;
T poly_times_x_hi =
abacus::internal::multiply_exact(poly, xMAnt1m, &poly_times_x_lo);

T total_sum_lo;
T total_sum_hi = abacus::internal::add_exact(
(T)1.44269502162933349609375f, poly_times_x_hi, &total_sum_lo);

total_sum_lo += poly_times_x_lo;
// total_sum_lo += 1.837066650390625e-8f; // a magic correction term
total_sum_lo += 1.92596333e-8f;

T result_lo;
T result_hi =
abacus::internal::multiply_exact(total_sum_hi, xMAnt1m, &result_lo);

*out_remainder = result_lo + (xMAnt1m * total_sum_lo);
return result_hi;
T hi = xMAnt1m * abacus::internal::horner_polynomial(
xMAnt1m, __codeplay_log2_extended_precision_coeff);
T lo = 0;

auto AddF = [](T &hi, T &lo, T val) {
abacus::internal::add_exact(&hi, &val);
abacus::internal::add_exact(&lo, &val);
};
auto MulF = [](T &hi, T &lo, T val) {
T mullo;
hi = abacus::internal::multiply_exact(hi, val, &mullo);
lo = __abacus_fma(val, lo, mullo);
};
auto MulD = [&AddF, &MulF](T &hi, T &lo, T hival, T loval) {
T hihi = hi, hilo = lo, lohi = hi, lolo = lo;
MulF(hihi, hilo, hival);
MulF(lohi, lolo, loval);

hi = lohi;
lo = lolo;
AddF(hi, lo, hilo);
AddF(hi, lo, hihi);
};

AddF(hi, lo, -0.5f);
AddF(hi, lo, 3.3101797e-09f);
MulF(hi, lo, xMAnt1m);
AddF(hi, lo, 1.f);
AddF(hi, lo, 6.3439454e-10f);
MulF(hi, lo, xMAnt1m);
MulD(hi, lo, 1.442695f, 1.925963e-08f);

*out_remainder = lo;
return hi;
}
};

Expand Down Expand Up @@ -161,12 +171,12 @@ T log2_extended_precision_half_unsafe(const T &x, T *ans_lo, T *xExp) {
T c_term_hi = 1.4423828125f16;
T c_term_lo = 0.000306606292724609375f16;

abacus::internal::add_exact(&c_term_hi, &poly_hi);
abacus::internal::add_exact_unsafe(&c_term_hi, &poly_hi);
// This adds in exactly, so no need for an add_exact
c_term_lo = c_term_lo + poly_lo;

abacus::internal::add_exact(&c_term_lo, &poly_hi);
abacus::internal::add_exact(&c_term_hi, &c_term_lo);
abacus::internal::add_exact_unsafe(&c_term_lo, &poly_hi);
abacus::internal::add_exact_unsafe(&c_term_hi, &c_term_lo);

// This adds in exactly, no need for add_exact
c_term_lo = c_term_lo + poly_hi;
Expand Down Expand Up @@ -249,12 +259,12 @@ T log2_extended_precision_half_safe(const T &x, T *ans_lo, T *hiExp, T *loExp) {
// 0.0098114 ==> 0.000306606292724609375 * 2^5
T c_term_lo = 0.0098114f16;

abacus::internal::add_exact(&c_term_hi, &poly_hi);
abacus::internal::add_exact_unsafe(&c_term_hi, &poly_hi);
// This adds in exactly, so no need for an add_exact
c_term_lo = c_term_lo + poly_lo;

abacus::internal::add_exact(&c_term_lo, &poly_hi);
abacus::internal::add_exact(&c_term_hi, &c_term_lo);
abacus::internal::add_exact_unsafe(&c_term_lo, &poly_hi);
abacus::internal::add_exact_unsafe(&c_term_hi, &c_term_lo);

// This adds in exactly, no need for add_exact
c_term_lo = c_term_lo + poly_hi;
Expand Down Expand Up @@ -301,102 +311,6 @@ T log2_extended_precision_half_safe(const T &x, T *ans_lo, T *hiExp, T *loExp) {
}

#endif // __CA_BUILTINS_HALF_SUPPORT

/*
If you ever need a more accurate log2 for use in pow this is one. I wrote it
when tests were failing in the other building but ended up fixing it a different
way.
inline float log_extended_precision(float xMant, float * out_remainder){
//New plan, get ln(xMAnt) first, then change to log2:
float xMant1m = xMant - 1.0f;
float poly = (-0.25f + (0.2f + (-0.16666573839961348e0f +
(0.14285549680165117e0f + (-0.12508235072240584e0f + (0.11122317723555506e0f +
(-0.97786162288840440e-1f + (0.88356744995684893e-1f + (-0.10603185816023552e0f
+ 0.10021792733479427e0f * xMant1m) * xMant1m) * xMant1m) * xMant1m) * xMant1m)
* xMant1m) * xMant1m) * xMant1m) *xMant1m) * xMant1m;
poly += 3.333333432674407958984375e-1f;
poly -= 9.934107462565104166666e-9; //To conensate for 1/3 not being
exact
//We need xMant1m*(xMant1m*(xMant1m*poly - 0.5) + 1) accurately now
//xMant1m*poly - 0.5:
float term1_lo;
float term1_hi = abacus::internal::multiply_exact(poly, xMant1m,
&term1_lo);
float mhalf = -0.5f;
abacus::internal::add_exact(&mhalf, &term1_hi);
abacus::internal::add_exact(&term1_hi, &term1_lo);
term1_lo = term1_hi;
term1_hi = mhalf;
//multiply by xMant1m:
float term2_lo;
float term2_hi = abacus::internal::multiply_exact(term1_hi, xMant1m,
&term2_lo);
float term2_lo_lo;
float term2_lo_hi = abacus::internal::multiply_exact(term1_lo, xMant1m,
&term2_lo_lo);
//Make sure all these get added up ok:
abacus::internal::add_exact(&term2_lo, &term2_lo_hi);
abacus::internal::add_exact(&term2_hi, &term2_lo);
abacus::internal::add_exact(&term2_lo_hi, &term2_lo_lo); //exact
abacus::internal::add_exact(&term2_lo, &term2_lo_hi);
abacus::internal::add_exact(&term2_hi, &term2_lo);
//Multiply by xMant again:
float term3_lo;
float term3_hi = abacus::internal::multiply_exact(term2_hi, xMant1m,
&term3_lo);
float term3_lo_lo;
float term3_lo_hi = abacus::internal::multiply_exact(term2_lo_hi,
xMant1m, &term3_lo_lo);
term3_lo_lo += term2_lo_hi*xMant1m;
abacus::internal::add_exact(&term2_lo_hi, &term3_lo_lo);
abacus::internal::add_exact(&term3_lo, &term2_lo_hi);
abacus::internal::add_exact(&term3_hi, &term3_lo);
//Add in xMant1m for the final answer
abacus::internal::add_exact(&xMant1m, &term3_hi);
abacus::internal::add_exact(&term3_hi, &term3_lo);
abacus::internal::add_exact(&term3_hi, &term2_lo_hi);
*out_remainder = term3_hi;
return xMant1m;
}
inline float log2_extended_precision(float xMant, float * out_remainder){
//Get the log first:
float ln_lo;
float ln_hi = log_extended_precision(xMant, &ln_lo);
//an accurate 1/ln(2) (both needed)
float recip_ln2_hi = 1.44269502162933349609375;
float recip_ln2_lo =
1.9259629911266174681001892137426645954152985934135449406931109219181185079885526622893506345e-8;
float log2_hi_lo;
float log2_hi_hi = abacus::internal::multiply_exact(ln_hi, recip_ln2_hi,
&log2_hi_lo);
*out_remainder = log2_hi_lo + (ln_lo*recip_ln2_hi + ln_hi*
recip_ln2_lo);
return log2_hi_hi;
}
*/
} // namespace internal
} // namespace abacus

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ inline T log_extended_precision(const T &xMant, T *out_remainder) {
T first_term_hi =
abacus::internal::multiply_exact_unsafe(xMant1m, poly, &first_term_lo);

abacus::internal::add_exact(&first_term_const, &first_term_hi);
abacus::internal::add_exact_unsafe(&first_term_const, &first_term_hi);
const T small_correction_term = xMant1m * error_in_1_over_3;

first_term_hi += (first_term_lo + small_correction_term);
Expand All @@ -102,7 +102,7 @@ inline T log_extended_precision(const T &xMant, T *out_remainder) {
xMant1m, first_term_lo, &second_term_lo_lo);

second_term_hi_lo += second_term_lo_hi; // Exact
abacus::internal::add_exact(&second_term_const, &second_term_hi_hi);
abacus::internal::add_exact_unsafe(&second_term_const, &second_term_hi_hi);
second_term_hi_hi += second_term_hi_lo; // Exact

//--------------------------------------------------------------------------
Expand All @@ -116,10 +116,10 @@ inline T log_extended_precision(const T &xMant, T *out_remainder) {
T third_term_lo_hi = abacus::internal::multiply_exact_unsafe(
xMant1m, second_term_hi_hi, &third_term_lo_lo);

abacus::internal::add_exact(&third_term_hi_lo, &third_term_lo_hi);
abacus::internal::add_exact_unsafe(&third_term_hi_lo, &third_term_lo_hi);

abacus::internal::add_exact(&third_term_const, &third_term_hi_hi);
abacus::internal::add_exact(&third_term_hi_hi, &third_term_hi_lo);
abacus::internal::add_exact_unsafe(&third_term_const, &third_term_hi_hi);
abacus::internal::add_exact_unsafe(&third_term_hi_hi, &third_term_hi_lo);

third_term_hi_lo = third_term_lo_hi;
third_term_lo_hi = third_term_lo_lo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ inline T multiply_extended_precision(
const SignedType n_lo = n & 0x0000FFFF;

T high_sum_lo;
const T high_sum_hi =
abacus::internal::add_exact<>(xExp_abacus_float, input_hi, &high_sum_lo);
const T high_sum_hi = abacus::internal::add_exact_unsafe<>(
xExp_abacus_float, input_hi, &high_sum_lo);

high_sum_lo += input_lo;

Expand All @@ -55,14 +55,14 @@ inline T multiply_extended_precision(

T more_sum_lo;
const T more_sum_hi =
abacus::internal::add_exact<>(term1_hi, term1_lo, &more_sum_lo);
abacus::internal::add_exact_unsafe<>(term1_hi, term1_lo, &more_sum_lo);

const T half_sum = (more_sum_lo + term2_lo) +
(abacus::detail::cast::convert<T>(n) * high_sum_lo);

T final_sum_lo;
const T final_sum_hi =
abacus::internal::add_exact<>(more_sum_hi, half_sum, &final_sum_lo);
const T final_sum_hi = abacus::internal::add_exact_unsafe<>(
more_sum_hi, half_sum, &final_sum_lo);

const T total_floor = __abacus_floor(final_sum_hi);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ struct pow_unsafe_helper<T, abacus_float> {
SignedType xExp;
T xMant = __abacus_frexp(x, &xExp);

const SignedType cond = xMant <= 0.671092f;
const SignedType cond = xMant <= 0.70710678f;
xMant = __abacus_select(xMant, xMant * 2.0f, cond);
xExp = __abacus_select(xExp, xExp - 1, cond);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,14 @@ Poly;

Err=dirtyinfnorm(Poly-Func, Range);
print("\ninf-norm error:", Err);

print("Single log2 with extended precision for use in pow:");

Range=[sqrt(0.5)-1;2*sqrt(0.5)-1];
Func=log(x+1);
Order=[|1,2,3,4,5,6,7,8,9,10,11|];
Poly=fpminimax(Func, Order, [|double, double, single...|], Range, floating, relative);
Poly;

Err=dirtyinfnorm(Poly-Func, Range);
print("\ninf-norm error:", Err);
6 changes: 3 additions & 3 deletions modules/compiler/builtins/abacus/source/abacus_math/acos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,10 @@ T ABACUS_API acos_half(T x) {
abacus::internal::multiply_exact<T>(x2, __codeplay_acos_2[1], &mul_lo);
// For all possible inputs of acos, we have tested that exponent of
// __codeplay_acos_2[0] is >= expoonent of mul_hi, so therefore it's safe to
// use add_exact instead of add_exact_safe.
// use add_exact_unsafe instead of add_exact.
T mul_add_lo;
const T mul_add_hi =
abacus::internal::add_exact<T>(__codeplay_acos_2[0], mul_hi, &mul_add_lo);
const T mul_add_hi = abacus::internal::add_exact_unsafe<T>(
__codeplay_acos_2[0], mul_hi, &mul_add_lo);
mul_add_lo = mul_add_lo + mul_lo;

// Multiply by xAbs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ T atan2pi_horner_polynomial(const T x2) {
// `add_exact`, then computing the rest of the horner polynomial by hand gives
// us some extra precision.
T first_iter_lo;
const T first_iter_hi = abacus::internal::add_exact<T>(
const T first_iter_hi = abacus::internal::add_exact_unsafe<T>(
_atan2piH[3], x2 * _atan2piH[4], &first_iter_lo);

T poly = _atan2piH[2] + ((x2 * first_iter_lo) + (x2 * first_iter_hi));
Expand Down
4 changes: 2 additions & 2 deletions modules/compiler/builtins/abacus/source/abacus_math/cospi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ struct helper<T, abacus_half> {

// For all possible inputs of cospi, we have tested that exponent of
// polynomial[0] is >= expoonent of poly_mul_hi, so therefore it's safe to
// use add_exact instead of add_exact_safe.
// use add_exact_unsafe instead of add_exact.
T poly_mul_add_lo;
const T poly_mul_add_hi = abacus::internal::add_exact<T>(
const T poly_mul_add_hi = abacus::internal::add_exact_unsafe<T>(
polynomial[0], poly_mul_hi, &poly_mul_add_lo);
poly_mul_add_lo += poly_mul_lo;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <abacus/abacus_config.h>
#include <abacus/abacus_math.h>
#include <abacus/internal/add_exact.h>
#include <abacus/internal/exp_unsafe.h>
#include <abacus/internal/horner_polynomial.h>
#include <abacus/internal/multiply_exact.h>
Expand Down
Loading

0 comments on commit 5809479

Please sign in to comment.