diff --git a/simde/simde-arch.h b/simde/simde-arch.h index 5240783c6..6b689c667 100644 --- a/simde/simde-arch.h +++ b/simde/simde-arch.h @@ -191,6 +191,14 @@ #define SIMDE_BUG_LCC_XOP_MISSING #define SIMDE_BUG_LCC_FMA_WRONG_RESULT #define SIMDE_BUG_LCC_AVX_NO_LOAD_STORE_U2 + +/* Some native functions on E2K with instruction set < v6 + are declared as deprecated due to inefficiency. + Still they are more efficient than SIMDe implementation. + So we're using them, and switching off these deprecation warnings. */ +#define SIMDE_BUG_PCLMUL_XOP_DEPRECATED +#define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +#define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") #endif /* HP/PA / PA-RISC @@ -317,7 +325,7 @@ # if defined(__GFNI__) # define SIMDE_ARCH_X86_GFNI 1 # endif -# if defined(__PCLMUL__) && !defined(SIMDE_ARCH_E2K) /* E2K has inefficient implementation of PCLMUL */ +# if defined(__PCLMUL__) # define SIMDE_ARCH_X86_PCLMUL 1 # endif # if defined(__VPCLMULQDQ__) diff --git a/simde/x86/clmul.h b/simde/x86/clmul.h index e2bf77f99..d6c6fe5e0 100644 --- a/simde/x86/clmul.h +++ b/simde/x86/clmul.h @@ -203,8 +203,21 @@ simde_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8) return simde__m128i_from_private(r_); } + +#if defined(SIMDE_X86_PCLMUL_NATIVE) && defined(SIMDE_BUG_PCLMUL_XOP_DEPRECATED) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS + SIMDE_FUNCTION_ATTRIBUTES + simde__m128i + simde_undeprecated_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8) { + return _mm_clmulepi64_si128(a, b, imm8); + } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#else + #define simde_undeprecated_mm_clmulepi64_si128 _mm_clmulepi64_si128 +#endif + #if defined(SIMDE_X86_PCLMUL_NATIVE) - #define simde_mm_clmulepi64_si128(a, b, imm8) _mm_clmulepi64_si128(a, b, imm8) + #define simde_mm_clmulepi64_si128(a, b, imm8) simde_undeprecated_mm_clmulepi64_si128(a, b, imm8) #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) #define simde_mm_clmulepi64_si128(a, b, imm8) \ simde__m128i_from_neon_u64( \ @@ -233,20 +246,20 @@ simde_mm256_clmulepi64_epi128 (simde__m256i a, simde__m256i b, const int imm8) #if defined(SIMDE_X86_PCLMUL_NATIVE) switch (imm8 & 0x11) { case 0x00: - r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x00); - r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x00); + r_.m128i[0] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x00); + r_.m128i[1] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x00); break; case 0x01: - r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x01); - r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x01); + r_.m128i[0] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x01); + r_.m128i[1] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x01); break; case 0x10: - r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x10); - r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x10); + r_.m128i[0] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x10); + r_.m128i[1] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x10); break; case 0x11: - r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x11); - r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x11); + r_.m128i[0] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x11); + r_.m128i[1] = simde_undeprecated_mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x11); break; } #else diff --git a/simde/x86/f16c.h b/simde/x86/f16c.h index ecbb748fd..27afddc2b 100644 --- a/simde/x86/f16c.h +++ b/simde/x86/f16c.h @@ -45,12 +45,14 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtps_ph(simde__m128 a, const int sae) { #if defined(SIMDE_X86_F16C_NATIVE) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS switch (sae & SIMDE_MM_FROUND_NO_EXC) { case SIMDE_MM_FROUND_NO_EXC: return _mm_cvtps_ph(a, SIMDE_MM_FROUND_NO_EXC); default: return _mm_cvtps_ph(a, 0); } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS #else simde__m128_private a_ = simde__m128_to_private(a); simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); @@ -102,12 +104,14 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvtps_ph(simde__m256 a, const int sae) { #if defined(SIMDE_X86_F16C_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS switch (sae & SIMDE_MM_FROUND_NO_EXC) { case SIMDE_MM_FROUND_NO_EXC: return _mm256_cvtps_ph(a, SIMDE_MM_FROUND_NO_EXC); default: return _mm256_cvtps_ph(a, 0); } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS #else simde__m256_private a_ = simde__m256_to_private(a); simde__m128i_private r_; diff --git a/simde/x86/xop.h b/simde/x86/xop.h index 6337e2a42..94e0eae9a 100644 --- a/simde/x86/xop.h +++ b/simde/x86/xop.h @@ -3511,8 +3511,21 @@ simde_mm_permute2_ps (simde__m128 a, simde__m128 b, simde__m128i c, const int im return simde__m128_from_private(r_); } + +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_BUG_PCLMUL_XOP_DEPRECATED) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_undeprecated_mm_permute2_ps (simde__m128 a, simde__m128 b, simde__m128i c, const int imm8) { + return _mm_permute2_ps(a, b, c, imm8); + } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#else + #define simde_undeprecated_mm_permute2_ps _mm_permute2_ps +#endif + #if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm_permute2_ps(a, b, c, imm8) _mm_permute2_ps((a), (b), (c), (imm8)) + #define simde_mm_permute2_ps(a, b, c, imm8) simde_undeprecated_mm_permute2_ps((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_permute2_ps(a, b, c, imm8) simde_mm_permute2_ps((a), (b), (c), (imm8)) @@ -3547,8 +3560,21 @@ simde_mm_permute2_pd (simde__m128d a, simde__m128d b, simde__m128i c, const int return simde__m128d_from_private(r_); } + +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_BUG_PCLMUL_XOP_DEPRECATED) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_undeprecated_mm_permute2_pd (simde__m128d a, simde__m128d b, simde__m128i c, const int imm8) { + return _mm_permute2_pd(a, b, c, imm8); + } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#else + #define simde_undeprecated_mm_permute2_pd _mm_permute2_pd +#endif + #if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm_permute2_pd(a, b, c, imm8) _mm_permute2_pd((a), (b), (c), (imm8)) + #define simde_mm_permute2_pd(a, b, c, imm8) simde_undeprecated_mm_permute2_pd((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_permute2_pd(a, b, c, imm8) simde_mm_permute2_pd((a), (b), (c), (imm8)) @@ -3589,8 +3615,21 @@ simde_mm256_permute2_ps (simde__m256 a, simde__m256 b, simde__m256i c, const int return simde__m256_from_private(r_); } + +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_BUG_PCLMUL_XOP_DEPRECATED) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS + SIMDE_FUNCTION_ATTRIBUTES + simde__m256 + simde_undeprecated_mm256_permute2_ps (simde__m256 a, simde__m256 b, simde__m256i c, const int imm8) { + return _mm256_permute2_ps(a, b, c, imm8); + } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#else + #define simde_undeprecated_mm256_permute2_ps _mm256_permute2_ps +#endif + #if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm256_permute2_ps(a, b, c, imm8) _mm256_permute2_ps((a), (b), (c), (imm8)) + #define simde_mm256_permute2_ps(a, b, c, imm8) simde_undeprecated_mm256_permute2_ps((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_permute2_ps(a, b, c, imm8) simde_mm256_permute2_ps((a), (b), (c), (imm8)) @@ -3631,8 +3670,21 @@ simde_mm256_permute2_pd (simde__m256d a, simde__m256d b, simde__m256i c, const i return simde__m256d_from_private(r_); } + +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_BUG_PCLMUL_XOP_DEPRECATED) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS + SIMDE_FUNCTION_ATTRIBUTES + simde__m256d + simde_undeprecated_mm256_permute2_pd (simde__m256d a, simde__m256d b, simde__m256i c, const int imm8) { + return _mm256_permute2_pd(a, b, c, imm8); + } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#else + #define simde_undeprecated_mm256_permute2_pd _mm256_permute2_pd +#endif + #if defined(SIMDE_X86_XOP_NATIVE) - #define simde_mm256_permute2_pd(a, b, c, imm8) _mm256_permute2_pd((a), (b), (c), (imm8)) + #define simde_mm256_permute2_pd(a, b, c, imm8) simde_undeprecated_mm256_permute2_pd((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_permute2_pd(a, b, c, imm8) simde_mm256_permute2_pd((a), (b), (c), (imm8))