Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cr_hypot from core-math #322

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/compiler-builtins-smoke-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@

#[path = "../../../src/math/mod.rs"]
pub mod libm;

mod math {
pub use super::libm::*;
}
14 changes: 14 additions & 0 deletions crates/libm-test/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ mod musl_reference_tests {
let files = fs::read_dir(math_src)
.unwrap()
.map(|f| f.unwrap().path())
.filter(file_needs_test)
.collect::<Vec<_>>();

let mut math = Vec::new();
Expand Down Expand Up @@ -94,6 +95,19 @@ mod musl_reference_tests {
generate_unit_tests(&math);
}

/// Check whether a path within `src/math` should get tests generated.
fn file_needs_test(path: &PathBuf) -> bool {
// Skip directories
if path.is_dir() {
return false;
}

let fname = path.file_name().unwrap().to_str().unwrap();

// Musl doesn't support `f16` or `f128`
!(fname.contains("f16") || fname.contains("f128"))
}

/// A "poor man's" parser for the signature of a function
fn parse(s: &str) -> Function {
let s = eat(s, "pub fn ");
Expand Down
7 changes: 5 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! libm in pure Rust
#![no_std]
#![cfg_attr(feature = "unstable", allow(internal_features))]
#![cfg_attr(feature = "unstable", feature(core_intrinsics))]
// #![cfg_attr(feature = "unstable", allow(internal_features))]
// #![cfg_attr(feature = "unstable", feature(core_intrinsics))]
#![allow(clippy::assign_op_pattern)]
#![allow(clippy::deprecated_cfg_attr)]
#![allow(clippy::eq_op)]
Expand All @@ -11,6 +11,9 @@
#![allow(clippy::mixed_case_hex_literals)]
#![allow(clippy::needless_return)]
#![allow(clippy::unreadable_literal)]
#![allow(unused)]
#![feature(core_intrinsics)]
#![allow(internal_features)]

mod libm_helper;
mod math;
Expand Down
302 changes: 289 additions & 13 deletions src/math/hypot.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
/*
Copyright (c) 2022 Alexei Sibidanov.

This file is part of the CORE-MATH project
(https://core-math.gitlabpages.inria.fr/).
*/

use core::f64;

use super::sqrt;

const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1

fn sq(x: f64) -> (f64, f64) {
let xh: f64;
let xl: f64;
let xc: f64;

xc = x * SPLIT;
xh = x - xc + xc;
xl = x - xh;
let hi = x * x;
let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
(hi, lo)
}

#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn hypot(mut x: f64, mut y: f64) -> f64 {
if true {
return cr_hypot(x, y);
}

let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700

Expand Down Expand Up @@ -72,3 +70,281 @@ pub fn hypot(mut x: f64, mut y: f64) -> f64 {
let (hy, ly) = sq(y);
z * sqrt(ly + lx + hy + hx)
}

fn sq(x: f64) -> (f64, f64) {
let xh: f64;
let xl: f64;
let xc: f64;

xc = x * SPLIT;
xh = x - xc + xc;
xl = x - xh;
let hi = x * x;
let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
(hi, lo)
}

fn cr_hypot(mut x: f64, mut y: f64) -> f64 {
let flag = get_flags();

let xi = x.to_bits();
let yi = y.to_bits();

let emsk: u64 = 0x7ffu64 << 52;
let mut ex: u64 = xi & emsk;
let mut ey: u64 = yi & emsk;
/* emsk corresponds to the upper bits of NaN and Inf (apart the sign bit) */
x = __builtin_fabs(x);
y = __builtin_fabs(y);
if __builtin_expect(ex == emsk || ey == emsk, false) {
/* Either x or y is NaN or Inf */
let wx: u64 = xi << 1;
let wy: u64 = yi << 1;
let wm: u64 = emsk << 1;
let ninf: i32 = ((wx == wm) ^ (wy == wm)) as i32;
let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32;
/* ninf is 1 when only one of x and y is +/-Inf
nqnn is 1 when only one of x and y is qNaN
IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */
if ninf > 0 && nqnn > 0 {
return f64::INFINITY;
}
return x + y; /* inf, nan */
}

let u: f64 = x.max(y);
let v: f64 = x.min(y);
let mut xd: u64 = u.to_bits();
let mut yd: u64 = v.to_bits();
ey = yd;
if __builtin_expect(ey >> 52 == 0, false) {
if yd == 0 {
return f64::from_bits(xd);
}
ex = xd;
if __builtin_expect(ex >> 52 == 0, false) {
if ex == 0 {
return 0.0;
}
return as_hypot_denorm(ex, ey);
}

let nz: u32 = ey.leading_zeros();
ey <<= nz - 11;
ey &= u64::MAX >> 12;
ey -= ((nz as i64 - 12i64) << 52) as u64;
let t = ey; // why did they do this?
yd = t;
}

let de: u64 = xd - yd;
if __builtin_expect(de > (27_u64 << 52), false) {
return __builtin_fma(hf64!("0x1p-27"), v, u);
}

let off: i64 = (0x3ff_i64 << 52) - (xd & emsk) as i64;
xd += off as u64;
yd += off as u64;
x = f64::from_bits(xd);
y = f64::from_bits(yd);
let x2: f64 = x * x;
let dx2: f64 = __builtin_fma(x, x, -x2);
let y2: f64 = y * y;
let dy2: f64 = __builtin_fma(y, y, -y2);
let r2: f64 = x2 + y2;
let ir2: f64 = 0.5 / r2;
let dr2: f64 = ((x2 - r2) + y2) + (dx2 + dy2);
let mut th: f64 = sqrt(r2);
let rsqrt: f64 = th * ir2;
let dz: f64 = dr2 - __builtin_fma(th, th, -r2);
let mut tl: f64 = rsqrt * dz;
th = fasttwosum(th, tl, &mut tl);
let mut thd: u64 = th.to_bits();
let tld = __builtin_fabs(tl).to_bits();
ex = thd;
ey = tld;
ex &= 0x7ff_u64 << 52;
let aidr: u64 = ey + (0x3fe_u64 << 52) - ex;
let mid: u64 = (aidr.wrapping_sub(0x3c90000000000000) + 16) >> 5;
if __builtin_expect(
mid == 0 || aidr < 0x39b0000000000000_u64 || aidr > 0x3c9fffffffffff80_u64,
false,
) {
thd = as_hypot_hard(x, y, flag).to_bits();
}
thd -= off as u64;
if __builtin_expect(thd >= (0x7ff_u64 << 52), false) {
return as_hypot_overflow();
}

f64::from_bits(thd)
}

fn fasttwosum(x: f64, y: f64, e: &mut f64) -> f64 {
let s: f64 = x + y;
let z: f64 = s - x;
*e = y - z;
s
}

fn as_hypot_overflow() -> f64 {
let z: f64 = hf64!("0x1.fffffffffffffp1023");
let f = z + z;
if f > z {
// errno = ERANGE
}
f
}

/* Here the square root is refined by Newton iterations: x^2+y^2 is exact
and fits in a 128-bit integer, so the approximation is squared (which
also fits in a 128-bit integer), compared and adjusted if necessary using
the exact value of x^2+y^2. */
fn as_hypot_hard(x: f64, y: f64, flag: FExcept) -> f64 {
let op: f64 = 1.0 + hf64!("0x1p-54");
let om: f64 = 1.0 - hf64!("0x1p-54");
let mut xi: u64 = x.to_bits();
let yi: u64 = y.to_bits();
let mut bm: u64 = (xi & (u64::MAX >> 12)) | 1u64 << 52;
let mut lm: u64 = (yi & (u64::MAX >> 12)) | 1u64 << 52;
let be: i32 = (xi >> 52) as i32;
let le: i32 = (yi >> 52) as i32;
let ri: u64 = sqrt(x * x + y * y).to_bits();
let bs: i32 = 2;
let mut rm: u64 = ri & (u64::MAX >> 12);
let mut re: i32 = ((ri >> 52) - 0x3ff) as i32;
rm |= 1u64 << 52;

for _ in 0..3 {
if __builtin_expect(rm == 1u64 << 52, true) {
rm = u64::MAX >> 11;
re -= 1;
} else {
rm -= 1;
}
}
bm <<= bs;
let mut m2: u64 = bm * bm;
let de: i32 = be - le;
let mut ls: i32 = bs - de;
if __builtin_expect(ls >= 0, true) {
lm <<= ls;
m2 += lm * lm;
} else {
let lm2: u128 = (lm as u128) * (lm as u128);
ls *= 2;
m2 += (lm2 >> -ls) as u64;
m2 |= ((lm2 << (128 + ls)) != 0) as u64;
}
let k: i32 = bs + re;
let mut d: i64;
loop {
rm += 1 + (rm >= (1u64 << 53)) as u64;
let tm: u64 = rm << k;
let rm2: u64 = tm * tm;
d = m2 as i64 - rm2 as i64;

if d <= 0 {
break;
}
}

if d == 0 {
set_flags(flag);
} else {
if __builtin_expect(op == om, true) {
let tm: u64 = (rm << k) - (1 << (k - (rm <= (1u64 << 53)) as i32));
d = m2 as i64 - (tm * tm) as i64;

if __builtin_expect(d != 0, true) {
rm += d as u64 >> 63;
} else {
rm -= rm & 1;
}
} else {
rm -= ((op == 1.0) as u64) << (rm > (1u64 << 53)) as u32;
}
}
if rm >= (1u64 << 53) {
rm >>= 1;
re += 1;
}

let e: u64 = (be - 1 + re) as u64;
xi = (e << 52) + rm;

f64::from_bits(xi)
}

fn as_hypot_denorm(mut a: u64, mut b: u64) -> f64 {
let op: f64 = 1.0 + hf64!("0x1p-54");
let om: f64 = 1.0 - hf64!("0x1p-54");
let af: f64 = a as f64;
let bf: f64 = b as f64;
a <<= 1;
b <<= 1;
// Is this casting right?
let mut rm: u64 = sqrt(af * af + bf * bf) as u64;
let tm: u64 = rm << 1;
let mut d: i64 = ((a * a) as i64) - ((tm * tm) as i64) + ((b * b) as i64);
let s_d: i64 = d >> 63;
let um: i64 = ((rm as i64) ^ s_d) - s_d;
let mut drm: i64 = s_d + 1;
let mut dd: i64 = (um << 3) + 4;
let mut p_d: i64;
rm -= drm as u64;
drm += s_d;
loop {
p_d = d;
rm += drm as u64;
d -= dd;
dd += 8;
if !__builtin_expect((d ^ p_d) > 0, false) {
break;
}
}
p_d = (s_d & d) + (!s_d & p_d);
if __builtin_expect(p_d != 0, true) {
if __builtin_expect(op == om, true) {
let sum: i64 = p_d - 4 * (rm as i64) - 1;
if __builtin_expect(sum != 0, true) {
rm += (sum as u64 >> 63) + 1;
} else {
rm += rm & 1;
}
} else {
rm += (op > 1.0) as u64;
}
}
let xi: u64 = rm;
f64::from_bits(xi)
}

fn __builtin_expect<T>(v: T, _exp: T) -> T {
v
}

fn __builtin_fabs(x: f64) -> f64 {
unsafe { core::intrinsics::fabsf64(x) }
}

fn __builtin_copysign(x: f64, y: f64) -> f64 {
unsafe { core::intrinsics::copysignf64(x, y) }
}

fn __builtin_fma(x: f64, y: f64, z: f64) -> f64 {
unsafe { core::intrinsics::fmaf64(x, y, z) }
}

type FExcept = u32;

fn get_rounding_mode(_flag: &mut FExcept) -> i32 {
// Always nearest
0
}

fn set_flags(_flag: FExcept) {}

fn get_flags() -> FExcept {
0
}
Loading