From 04464c41884b0174e60a5131803ca33a9bf598e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=91=D1=80=D0=B0=D0=BD=D0=B8=D0=BC=D0=B8=D1=80=20=D0=9A?=
 =?UTF-8?q?=D0=B0=D1=80=D0=B0=D1=9F=D0=B8=D1=9B?=
 <branimirkaradzic@gmail.com>
Date: Tue, 19 Nov 2024 22:23:44 -0800
Subject: [PATCH] Fixed Rgb9E5F decoding. Removed union cast in favor of
 bx::bitCast.

---
 include/bx/inline/allocator.inl   | 11 ++++---
 include/bx/inline/pixelformat.inl | 43 ++++++++++++++-------------
 include/bx/inline/uint32_t.inl    | 49 +++++++++++++++----------------
 src/dtoa.cpp                      | 33 +++++++--------------
 tests/pixelformat_test.cpp        | 31 +++++++++++++++++++
 5 files changed, 91 insertions(+), 76 deletions(-)
 create mode 100644 tests/pixelformat_test.cpp

diff --git a/include/bx/inline/allocator.inl b/include/bx/inline/allocator.inl
index b237be525..646ebbc04 100644
--- a/include/bx/inline/allocator.inl
+++ b/include/bx/inline/allocator.inl
@@ -24,12 +24,11 @@ namespace bx
 
 	inline void* alignPtr(void* _ptr, size_t _extra, size_t _align)
 	{
-		union { void* ptr; uintptr_t addr; } un;
-		un.ptr = _ptr;
-		uintptr_t unaligned = un.addr + _extra; // space for header
-		uintptr_t aligned = bx::alignUp(unaligned, int32_t(_align) );
-		un.addr = aligned;
-		return un.ptr;
+		const uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		const uintptr_t unaligned = addr + _extra; // space for header
+		const uintptr_t aligned = bx::alignUp(unaligned, int32_t(_align) );
+
+		return bitCast<void*>(aligned);
 	}
 
 	inline void* alloc(AllocatorI* _allocator, size_t _size, size_t _align, const Location& _location)
diff --git a/include/bx/inline/pixelformat.inl b/include/bx/inline/pixelformat.inl
index d07be313e..1d62bfd2f 100644
--- a/include/bx/inline/pixelformat.inl
+++ b/include/bx/inline/pixelformat.inl
@@ -21,9 +21,7 @@ namespace bx
 
 	inline int32_t toSnorm(float _value, float _scale)
 	{
-		return int32_t(round(
-					clamp(_value, -1.0f, 1.0f) * _scale)
-					);
+		return int32_t(round(clamp(_value, -1.0f, 1.0f) * _scale) );
 	}
 
 	inline float fromSnorm(int32_t _value, float _scale)
@@ -721,46 +719,49 @@ namespace bx
 		memCopy(_dst, _src, 8);
 	}
 
-	template<int32_t MantissaBits, int32_t ExpBits>
+	template<int32_t MantissaBitsT, int32_t ExpBitsT>
 	inline void encodeRgbE(float* _dst, const float* _src)
 	{
 		// Reference(s):
 		// - https://web.archive.org/web/20181126040035/https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
 		//
-		const int32_t expMax  = (1<<ExpBits) - 1;
-		const int32_t expBias = (1<<(ExpBits - 1) ) - 1;
+		const int32_t expMax  = (1<< ExpBitsT      ) - 1;
+		const int32_t expBias = (1<<(ExpBitsT - 1) ) - 1;
 		const float   sharedExpMax = float(expMax) / float(expMax + 1) * float(1 << (expMax - expBias) );
 
 		const float rr = clamp(_src[0], 0.0f, sharedExpMax);
 		const float gg = clamp(_src[1], 0.0f, sharedExpMax);
 		const float bb = clamp(_src[2], 0.0f, sharedExpMax);
 		const float mm = max(rr, gg, bb);
-		union { float ff; uint32_t ui; } cast = { mm };
-		int32_t expShared = int32_t(uint32_imax(uint32_t(-expBias-1), ( ( (cast.ui>>23) & 0xff) - 127) ) ) + 1 + expBias;
-		float denom = pow(2.0f, float(expShared - expBias - MantissaBits) );
+		const uint32_t mm_as_ui = bitCast<uint32_t>(mm);
 
-		if ( (1<<MantissaBits) == int32_t(round(mm/denom) ) )
+		int32_t expShared = int32_t(max(uint32_t(-expBias-1), ( ( (mm_as_ui>>23) & 0xff) - 127) ) ) + 1 + expBias;
+		float denom = pow(2.0f, float(expShared - expBias - MantissaBitsT) );
+
+		if ( (1<<MantissaBitsT) == int32_t(round(mm/denom) ) )
 		{
 			denom *= 2.0f;
 			++expShared;
 		}
 
-		const float invDenom = 1.0f/denom;
+		const float invDenom = rcpSafe(denom);
 		_dst[0] = round(rr * invDenom);
 		_dst[1] = round(gg * invDenom);
 		_dst[2] = round(bb * invDenom);
 		_dst[3] = float(expShared);
 	}
 
-	template<int32_t MantissaBits, int32_t ExpBits>
+	template<int32_t MantissaBitsT, int32_t ExpBitsT>
 	inline void decodeRgbE(float* _dst, const float* _src)
 	{
-		const int32_t expBias = (1<<(ExpBits - 1) ) - 1;
-		const float exponent  = _src[3]-float(expBias-MantissaBits);
+		const int32_t expBias = (1<<(ExpBitsT - 1) ) - 1;
+		const float exponent  = _src[3]-float(expBias-MantissaBitsT);
 		const float scale     = pow(2.0f, exponent);
-		_dst[0] = _src[0] * scale;
-		_dst[1] = _src[1] * scale;
-		_dst[2] = _src[2] * scale;
+		const float invScale  = rcpSafe(scale);
+
+		_dst[0] = _src[0] * invScale;
+		_dst[1] = _src[1] * invScale;
+		_dst[2] = _src[2] * invScale;
 	}
 
 	// RGB9E5F
@@ -779,12 +780,12 @@ namespace bx
 
 	inline void unpackRgb9E5F(float* _dst, const void* _src)
 	{
-		uint32_t packed = *( (const uint32_t*)_src);
+		const uint32_t packed = *( (const uint32_t*)_src);
 
 		float tmp[4];
-		tmp[0] = float( ( (packed    ) & 0x1ff) ) / 511.0f;
-		tmp[1] = float( ( (packed>> 9) & 0x1ff) ) / 511.0f;
-		tmp[2] = float( ( (packed>>18) & 0x1ff) ) / 511.0f;
+		tmp[0] = float( ( (packed    ) & 0x1ff) );
+		tmp[1] = float( ( (packed>> 9) & 0x1ff) );
+		tmp[2] = float( ( (packed>>18) & 0x1ff) );
 		tmp[3] = float( ( (packed>>27) &  0x1f) );
 
 		decodeRgbE<9, 5>(_dst, tmp);
diff --git a/include/bx/inline/uint32_t.inl b/include/bx/inline/uint32_t.inl
index 3daf8f752..34b92a19d 100644
--- a/include/bx/inline/uint32_t.inl
+++ b/include/bx/inline/uint32_t.inl
@@ -649,15 +649,15 @@ namespace bx
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC bool isAligned(Ty* _ptr, int32_t _align)
 	{
-		union { const void* ptr; uintptr_t addr; } un = { _ptr };
-		return isAligned(un.addr, _align);
+		const uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		return isAligned(addr, _align);
 	}
 
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC bool isAligned(const Ty* _ptr, int32_t _align)
 	{
-		union { const void* ptr; uintptr_t addr; } un = { _ptr };
-		return isAligned(un.addr, _align);
+		const uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		return isAligned(addr, _align);
 	}
 
 	template<typename Ty>
@@ -670,17 +670,17 @@ namespace bx
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC Ty* alignDown(Ty* _ptr, int32_t _align)
 	{
-		union { Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignDown(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignDown(addr, _align);
+		return bitCast<Ty*>(addr);
 	}
 
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC const Ty* alignDown(const Ty* _ptr, int32_t _align)
 	{
-		union { const Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignDown(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignDown(addr, _align);
+		return bitCast<const Ty*>(addr);
 	}
 
 	template<typename Ty>
@@ -693,23 +693,22 @@ namespace bx
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC Ty* alignUp(Ty* _ptr, int32_t _align)
 	{
-		union { Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignUp(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignUp(addr, _align);
+		return bitCast<Ty*>(addr);
 	}
 
 	template<typename Ty>
 	inline BX_CONSTEXPR_FUNC const Ty* alignUp(const Ty* _ptr, int32_t _align)
 	{
-		union { const Ty* ptr; uintptr_t addr; } un = { _ptr };
-		un.addr = alignUp(un.addr, _align);
-		return un.ptr;
+		uintptr_t addr = bitCast<uintptr_t>(_ptr);
+		addr = alignUp(addr, _align);
+		return bitCast<const Ty*>(addr);
 	}
 
 	inline BX_CONST_FUNC uint16_t halfFromFloat(float _a)
 	{
-		union { uint32_t ui; float flt; } ftou;
-		ftou.flt = _a;
+		const uint32_t a_as_ui = bitCast<uint32_t>(_a);
 
 		const uint32_t one                       = uint32_li(0x00000001);
 		const uint32_t f_s_mask                  = uint32_li(kFloatSignMask);
@@ -728,13 +727,13 @@ namespace bx
 		const uint32_t f_h_m_pos_offset          = uint32_li(0x0000000d);
 		const uint32_t h_nan_min                 = uint32_li(0x00007c01);
 		const uint32_t f_h_e_biased_flag         = uint32_li(0x0000008f);
-		const uint32_t f_s                       = uint32_and(ftou.ui, f_s_mask);
-		const uint32_t f_e                       = uint32_and(ftou.ui, f_e_mask);
+		const uint32_t f_s                       = uint32_and(a_as_ui, f_s_mask);
+		const uint32_t f_e                       = uint32_and(a_as_ui, f_e_mask);
 		const uint16_t h_s                       = (uint16_t)uint32_srl(f_s, f_h_s_pos_offset);
-		const uint32_t f_m                       = uint32_and(ftou.ui, f_m_mask);
+		const uint32_t f_m                       = uint32_and(a_as_ui, f_m_mask);
 		const uint16_t f_e_amount                = (uint16_t)uint32_srl(f_e, f_e_pos);
 		const uint32_t f_e_half_bias             = uint32_sub(f_e_amount, f_h_bias_offset);
-		const uint32_t f_snan                    = uint32_and(ftou.ui, f_snan_mask);
+		const uint32_t f_snan                    = uint32_and(a_as_ui, f_snan_mask);
 		const uint32_t f_m_round_mask            = uint32_and(f_m, f_m_round_bit);
 		const uint32_t f_m_round_offset          = uint32_sll(f_m_round_mask, one);
 		const uint32_t f_m_rounded               = uint32_add(f_m, f_m_round_offset);
@@ -770,7 +769,7 @@ namespace bx
 		const uint32_t h_em_snan_result          = uint32_sels(is_f_snan_msb, h_snan_mask, h_em_denorm_result);
 		const uint32_t h_result                  = uint32_or(h_s, h_em_snan_result);
 
-		return (uint16_t)(h_result);
+		return uint16_t(h_result);
 	}
 
 	inline BX_CONST_FUNC float halfToFloat(uint16_t _a)
@@ -817,9 +816,7 @@ namespace bx
 		const uint32_t f_nan_result         = uint32_sels(is_nan_msb, f_em_nan, f_inf_result);
 		const uint32_t f_result             = uint32_or(f_s, f_nan_result);
 
-		union { uint32_t ui; float flt; } utof;
-		utof.ui = f_result;
-		return utof.flt;
+		return bitCast<float>(f_result);
 	}
 
 } // namespace bx
diff --git a/src/dtoa.cpp b/src/dtoa.cpp
index e65c2ffba..47c057a5d 100644
--- a/src/dtoa.cpp
+++ b/src/dtoa.cpp
@@ -606,12 +606,6 @@ namespace bx
 #define DOUBLE_PLUS_INFINITY  UINT64_C(0x7ff0000000000000)
 #define DOUBLE_MINUS_INFINITY UINT64_C(0xfff0000000000000)
 
-	union HexDouble
-	{
-		double d;
-		uint64_t u;
-	};
-
 #define lsr96(s2, s1, s0, d2, d1, d0)      \
 	d0 = ( (s0) >> 1) | ( ( (s1) & 1) << 31); \
 	d1 = ( (s1) >> 1) | ( ( (s2) & 1) << 31); \
@@ -932,13 +926,12 @@ namespace bx
 	static double converter(PrepNumber* _pn)
 	{
 		int binexp = 92;
-		HexDouble hd;
 		uint32_t s2, s1, s0; /* 96-bit precision integer */
 		uint32_t q2, q1, q0; /* 96-bit precision integer */
 		uint32_t r2, r1, r0; /* 96-bit precision integer */
 		uint32_t mask28 = UINT32_C(0xf) << 28;
 
-		hd.u = 0;
+		uint64_t hdu = 0;
 
 		s0 = (uint32_t)(_pn->mantissa & UINT32_MAX);
 		s1 = (uint32_t)(_pn->mantissa >> 32);
@@ -1011,18 +1004,18 @@ namespace bx
 		{
 			if (_pn->negative)
 			{
-				hd.u = DOUBLE_MINUS_INFINITY;
+				hdu = DOUBLE_MINUS_INFINITY;
 			}
 			else
 			{
-				hd.u = DOUBLE_PLUS_INFINITY;
+				hdu = DOUBLE_PLUS_INFINITY;
 			}
 		}
 		else if (binexp < 1)
 		{
 			if (_pn->negative)
 			{
-				hd.u = DOUBLE_MINUS_ZERO;
+				hdu = DOUBLE_MINUS_ZERO;
 			}
 		}
 		else if (s2)
@@ -1039,10 +1032,10 @@ namespace bx
 				q |= (1ULL << 63);
 			}
 
-			hd.u = q;
+			hdu = q;
 		}
 
-		return hd.d;
+		return bitCast<double>(hdu);
 	}
 
 	int32_t toString(char* _out, int32_t _max, bool _value)
@@ -1074,9 +1067,6 @@ namespace bx
 		pn.negative = 0;
 		pn.exponent = 0;
 
-		HexDouble hd;
-		hd.u = DOUBLE_PLUS_ZERO;
-
 		switch (parser(_str.getPtr(), _str.getTerm(), &pn) )
 		{
 		case PARSER_OK:
@@ -1084,22 +1074,19 @@ namespace bx
 			break;
 
 		case PARSER_PZERO:
-			*_out = hd.d;
+			*_out = bitCast<double>(DOUBLE_PLUS_ZERO);
 			break;
 
 		case PARSER_MZERO:
-			hd.u = DOUBLE_MINUS_ZERO;
-			*_out = hd.d;
+			*_out = bitCast<double>(DOUBLE_MINUS_ZERO);
 			break;
 
 		case PARSER_PINF:
-			hd.u = DOUBLE_PLUS_INFINITY;
-			*_out = hd.d;
+			*_out = bitCast<double>(DOUBLE_PLUS_INFINITY);
 			break;
 
 		case PARSER_MINF:
-			hd.u = DOUBLE_MINUS_INFINITY;
-			*_out = hd.d;
+			*_out = bitCast<double>(DOUBLE_MINUS_INFINITY);
 			break;
 		}
 
diff --git a/tests/pixelformat_test.cpp b/tests/pixelformat_test.cpp
new file mode 100644
index 000000000..03119688a
--- /dev/null
+++ b/tests/pixelformat_test.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2010-2024 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bx/blob/master/LICENSE
+ */
+
+#include "test.h"
+#include <bx/pixelformat.h>
+
+TEST_CASE("pack/unpack Rgba8", "[pixelformat]")
+{
+	float rgba[4] = { 0.1f, 0.3f, 0.8f, 0.9f };
+	uint32_t encoded;
+	bx::packRgba8(&encoded, rgba);
+
+	float decoded[4];
+	bx::unpackRgba8(decoded, &encoded);
+
+	REQUIRE(bx::isEqual(rgba, decoded, 4, 0.01f) );
+}
+
+TEST_CASE("pack/unpack Rgb9E5F", "[pixelformat]")
+{
+	float rgba[3] = { 0.1f, 0.3f, 0.89f };
+	uint32_t encoded;
+	bx::packRgb9E5F(&encoded, rgba);
+
+	float decoded[3];
+	bx::unpackRgb9E5F(decoded, &encoded);
+
+	REQUIRE(bx::isEqual(rgba, decoded, BX_COUNTOF(rgba), 0.001f) );
+}