Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a whole bunch more x86 vendor intrinsics #1380

Merged
merged 11 commits into from
Jun 23, 2023

Conversation

bjorn3
Copy link
Member

@bjorn3 bjorn3 commented Jun 23, 2023

Ignoring AVX512 I'm now down to 190 failing tests for stdarch out of 725.

Helps with #1289 and #1222

@bjorn3
Copy link
Member Author

bjorn3 commented Jun 23, 2023

missing intrinsics
    core_arch::x86::aes::tests::test_mm_aesdec_si128
    core_arch::x86::aes::tests::test_mm_aesdeclast_si128
    core_arch::x86::aes::tests::test_mm_aesenc_si128
    core_arch::x86::aes::tests::test_mm_aesenclast_si128
    core_arch::x86::aes::tests::test_mm_aesimc_si128
    core_arch::x86::aes::tests::test_mm_aeskeygenassist_si128
    core_arch::x86::fxsr::tests::fxsave
    core_arch::x86::pclmulqdq::tests::test_mm_clmulepi64_si128
    core_arch::x86::rdtsc::tests::_rdtsc
    core_arch::x86::rdtsc::tests::_rdtscp
    core_arch::x86::sse2::tests::test_mm_avg_epu16
    core_arch::x86::sse2::tests::test_mm_avg_epu8
    core_arch::x86::sse2::tests::test_mm_clflush
    core_arch::x86::sse2::tests::test_mm_cmpeq_sd
    core_arch::x86::sse2::tests::test_mm_cmpge_sd
    core_arch::x86::sse2::tests::test_mm_cmpgt_sd
    core_arch::x86::sse2::tests::test_mm_cmple_sd
    core_arch::x86::sse2::tests::test_mm_cmplt_sd
    core_arch::x86::sse2::tests::test_mm_cmpneq_sd
    core_arch::x86::sse2::tests::test_mm_cmpnge_sd
    core_arch::x86::sse2::tests::test_mm_cmpngt_sd
    core_arch::x86::sse2::tests::test_mm_cmpnle_sd
    core_arch::x86::sse2::tests::test_mm_cmpnlt_sd
    core_arch::x86::sse2::tests::test_mm_cmpord_sd
    core_arch::x86::sse2::tests::test_mm_cmpunord_sd
    core_arch::x86::sse2::tests::test_mm_comieq_sd
    core_arch::x86::sse2::tests::test_mm_comige_sd
    core_arch::x86::sse2::tests::test_mm_comigt_sd
    core_arch::x86::sse2::tests::test_mm_comile_sd
    core_arch::x86::sse2::tests::test_mm_comilt_sd
    core_arch::x86::sse2::tests::test_mm_comineq_sd
    core_arch::x86::sse2::tests::test_mm_cvtepi32_ps
    core_arch::x86::sse2::tests::test_mm_cvtpd_epi32
    core_arch::x86::sse2::tests::test_mm_cvtpd_ps
    core_arch::x86::sse2::tests::test_mm_cvtps_epi32
    core_arch::x86::sse2::tests::test_mm_cvtps_pd
    core_arch::x86::sse2::tests::test_mm_cvtsd_si32
    core_arch::x86::sse2::tests::test_mm_cvtsd_ss
    core_arch::x86::sse2::tests::test_mm_cvtss_sd
    core_arch::x86::sse2::tests::test_mm_cvttpd_epi32
    core_arch::x86::sse2::tests::test_mm_cvttps_epi32
    core_arch::x86::sse2::tests::test_mm_cvttsd_si32
    core_arch::x86::sse2::tests::test_mm_lfence
    core_arch::x86::sse2::tests::test_mm_madd_epi16
    core_arch::x86::sse2::tests::test_mm_maskmoveu_si128
    core_arch::x86::sse2::tests::test_mm_max_pd
    core_arch::x86::sse2::tests::test_mm_max_sd
    core_arch::x86::sse2::tests::test_mm_mfence
    core_arch::x86::sse2::tests::test_mm_min_pd
    core_arch::x86::sse2::tests::test_mm_min_sd
    core_arch::x86::sse2::tests::test_mm_mul_epu32
    core_arch::x86::sse2::tests::test_mm_mulhi_epi16
    core_arch::x86::sse2::tests::test_mm_mulhi_epu16
    core_arch::x86::sse2::tests::test_mm_packs_epi16
    core_arch::x86::sse2::tests::test_mm_packs_epi32
    core_arch::x86::sse2::tests::test_mm_packus_epi16
    core_arch::x86::sse2::tests::test_mm_sad_epu8
    core_arch::x86::sse2::tests::test_mm_sll_epi16
    core_arch::x86::sse2::tests::test_mm_sll_epi32
    core_arch::x86::sse2::tests::test_mm_sll_epi64
    core_arch::x86::sse2::tests::test_mm_sqrt_sd
    core_arch::x86::sse2::tests::test_mm_sra_epi16
    core_arch::x86::sse2::tests::test_mm_sra_epi32
    core_arch::x86::sse2::tests::test_mm_srl_epi16
    core_arch::x86::sse2::tests::test_mm_srl_epi32
    core_arch::x86::sse2::tests::test_mm_srl_epi64
    core_arch::x86::sse2::tests::test_mm_ucomieq_sd
    core_arch::x86::sse2::tests::test_mm_ucomige_sd
    core_arch::x86::sse2::tests::test_mm_ucomigt_sd
    core_arch::x86::sse2::tests::test_mm_ucomile_sd
    core_arch::x86::sse2::tests::test_mm_ucomilt_sd
    core_arch::x86::sse2::tests::test_mm_ucomineq_sd
    core_arch::x86::sse3::tests::test_mm_addsub_pd
    core_arch::x86::sse3::tests::test_mm_addsub_ps
    core_arch::x86::sse3::tests::test_mm_hadd_pd
    core_arch::x86::sse3::tests::test_mm_hadd_ps
    core_arch::x86::sse3::tests::test_mm_hsub_pd
    core_arch::x86::sse3::tests::test_mm_hsub_ps
    core_arch::x86::sse3::tests::test_mm_lddqu_si128
    core_arch::x86::sse41::tests::test_mm_blend_epi16
    core_arch::x86::sse41::tests::test_mm_blend_pd
    core_arch::x86::sse41::tests::test_mm_blend_ps
    core_arch::x86::sse41::tests::test_mm_blendv_epi8
    core_arch::x86::sse41::tests::test_mm_blendv_pd
    core_arch::x86::sse41::tests::test_mm_blendv_ps
    core_arch::x86::sse41::tests::test_mm_ceil_sd
    core_arch::x86::sse41::tests::test_mm_ceil_ss
    core_arch::x86::sse41::tests::test_mm_dp_pd
    core_arch::x86::sse41::tests::test_mm_dp_ps
    core_arch::x86::sse41::tests::test_mm_floor_sd
    core_arch::x86::sse41::tests::test_mm_floor_ss
    core_arch::x86::sse41::tests::test_mm_insert_ps
    core_arch::x86::sse41::tests::test_mm_minpos_epu16
    core_arch::x86::sse41::tests::test_mm_minpos_epu16_1
    core_arch::x86::sse41::tests::test_mm_minpos_epu16_2
    core_arch::x86::sse41::tests::test_mm_mpsadbw_epu8
    core_arch::x86::sse41::tests::test_mm_mul_epi32
    core_arch::x86::sse41::tests::test_mm_packus_epi32
    core_arch::x86::sse41::tests::test_mm_round_pd
    core_arch::x86::sse41::tests::test_mm_round_ps
    core_arch::x86::sse41::tests::test_mm_round_sd
    core_arch::x86::sse41::tests::test_mm_round_ss
    core_arch::x86::sse41::tests::test_mm_test_all_ones
    core_arch::x86::sse41::tests::test_mm_test_all_zeros
    core_arch::x86::sse41::tests::test_mm_test_mix_ones_zeros
    core_arch::x86::sse41::tests::test_mm_testc_si128
    core_arch::x86::sse41::tests::test_mm_testnzc_si128
    core_arch::x86::sse41::tests::test_mm_testz_si128
    core_arch::x86::sse42::tests::test_mm_cmpestra
    core_arch::x86::sse42::tests::test_mm_cmpestrc
    core_arch::x86::sse42::tests::test_mm_cmpestri
    core_arch::x86::sse42::tests::test_mm_cmpestrm
    core_arch::x86::sse42::tests::test_mm_cmpestro
    core_arch::x86::sse42::tests::test_mm_cmpestrs
    core_arch::x86::sse42::tests::test_mm_cmpestrz
    core_arch::x86::sse42::tests::test_mm_cmpistra
    core_arch::x86::sse42::tests::test_mm_cmpistrc
    core_arch::x86::sse42::tests::test_mm_cmpistri
    core_arch::x86::sse42::tests::test_mm_cmpistrm
    core_arch::x86::sse42::tests::test_mm_cmpistro
    core_arch::x86::sse42::tests::test_mm_cmpistrs
    core_arch::x86::sse42::tests::test_mm_cmpistrz
    core_arch::x86::sse42::tests::test_mm_crc32_u16
    core_arch::x86::sse42::tests::test_mm_crc32_u32
    core_arch::x86::sse42::tests::test_mm_crc32_u8
    core_arch::x86::sse::tests::test_mm_add_ss
    core_arch::x86::sse::tests::test_mm_cmpeq_ss
    core_arch::x86::sse::tests::test_mm_cmpge_ss
    core_arch::x86::sse::tests::test_mm_cmpgt_ss
    core_arch::x86::sse::tests::test_mm_cmple_ss
    core_arch::x86::sse::tests::test_mm_cmplt_ss
    core_arch::x86::sse::tests::test_mm_cmpneq_ss
    core_arch::x86::sse::tests::test_mm_cmpnge_ss
    core_arch::x86::sse::tests::test_mm_cmpngt_ss
    core_arch::x86::sse::tests::test_mm_cmpnle_ss
    core_arch::x86::sse::tests::test_mm_cmpnlt_ss
    core_arch::x86::sse::tests::test_mm_cmpord_ss
    core_arch::x86::sse::tests::test_mm_cmpunord_ss
    core_arch::x86::sse::tests::test_mm_comieq_ss
    core_arch::x86::sse::tests::test_mm_comieq_ss_vs_ucomieq_ss
    core_arch::x86::sse::tests::test_mm_comigt_ss
    core_arch::x86::sse::tests::test_mm_comile_ss
    core_arch::x86::sse::tests::test_mm_comilt_ss
    core_arch::x86::sse::tests::test_mm_comineq_ss
    core_arch::x86::sse::tests::test_mm_cvtsi32_ss
    core_arch::x86::sse::tests::test_mm_cvtss_si32
    core_arch::x86::sse::tests::test_mm_cvttss_si32
    core_arch::x86::sse::tests::test_mm_div_ss
    core_arch::x86::sse::tests::test_mm_getcsr_setcsr_1
    core_arch::x86::sse::tests::test_mm_getcsr_setcsr_2
    core_arch::x86::sse::tests::test_mm_getcsr_setcsr_underflow
    core_arch::x86::sse::tests::test_mm_max_ps
    core_arch::x86::sse::tests::test_mm_max_ss
    core_arch::x86::sse::tests::test_mm_min_ps
    core_arch::x86::sse::tests::test_mm_min_ss
    core_arch::x86::sse::tests::test_mm_mul_ss
    core_arch::x86::sse::tests::test_mm_rcp_ps
    core_arch::x86::sse::tests::test_mm_rcp_ss
    core_arch::x86::sse::tests::test_mm_rsqrt_ps
    core_arch::x86::sse::tests::test_mm_rsqrt_ss
    core_arch::x86::sse::tests::test_mm_sfence
    core_arch::x86::sse::tests::test_mm_sqrt_ps
    core_arch::x86::sse::tests::test_mm_sqrt_ss
    core_arch::x86::sse::tests::test_mm_sub_ss
    core_arch::x86::sse::tests::test_mm_ucomieq_ss
    core_arch::x86::sse::tests::test_mm_ucomige_ss
    core_arch::x86::sse::tests::test_mm_ucomigt_ss
    core_arch::x86::sse::tests::test_mm_ucomile_ss
    core_arch::x86::sse::tests::test_mm_ucomilt_ss
    core_arch::x86::sse::tests::test_mm_ucomineq_ss
    core_arch::x86::ssse3::tests::test_mm_hadd_epi16
    core_arch::x86::ssse3::tests::test_mm_hadd_epi32
    core_arch::x86::ssse3::tests::test_mm_hadds_epi16
    core_arch::x86::ssse3::tests::test_mm_hsub_epi16
    core_arch::x86::ssse3::tests::test_mm_hsub_epi32
    core_arch::x86::ssse3::tests::test_mm_hsubs_epi16
    core_arch::x86::ssse3::tests::test_mm_maddubs_epi16
    core_arch::x86::ssse3::tests::test_mm_mulhrs_epi16
    core_arch::x86::ssse3::tests::test_mm_sign_epi16
    core_arch::x86::ssse3::tests::test_mm_sign_epi32
    core_arch::x86::ssse3::tests::test_mm_sign_epi8
    core_arch::x86_64::fxsr::tests::fxsave64
    core_arch::x86_64::sse2::tests::test_mm_cvtsd_si64
    core_arch::x86_64::sse2::tests::test_mm_cvtsd_si64x
    core_arch::x86_64::sse2::tests::test_mm_cvttsd_si64
    core_arch::x86_64::sse2::tests::test_mm_cvttsd_si64x
    core_arch::x86_64::sse42::tests::test_mm_crc32_u64
    core_arch::x86_64::sse::tests::test_mm_cvtsi64_ss
    core_arch::x86_64::sse::tests::test_mm_cvtss_si64
    core_arch::x86_64::sse::tests::test_mm_cvttss_si64

test result: FAILED. 725 passed; 190 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.15s
stdarch patch
diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs
index 99e37c91..ee666063 100644
--- a/crates/assert-instr-macro/src/lib.rs
+++ b/crates/assert-instr-macro/src/lib.rs
@@ -181,7 +181,7 @@ pub fn assert_instr(
 
     let tokens: TokenStream = quote! {
         #item
-        #tokens
+        //#tokens
     };
     tokens.into()
 }
diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs
index fafee5c0..097a17f9 100644
--- a/crates/core_arch/src/x86/avx.rs
+++ b/crates/core_arch/src/x86/avx.rs
@@ -738,7 +738,7 @@ pub const _CMP_TRUE_US: i32 = 0x1f;
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM5, 5);
-    vcmppd(a, b, IMM5 as i8)
+    vcmppd(a, b, const { IMM5 as i8 })
 }
 
 /// Compares packed double-precision (64-bit) floating-point
@@ -768,7 +768,7 @@ pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM5, 5);
-    vcmpps(a, b, IMM5 as i8)
+    vcmpps(a, b, const { IMM5 as i8 })
 }
 
 /// Compares packed single-precision (32-bit) floating-point
@@ -783,7 +783,7 @@ pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM5, 5);
-    vcmpps256(a, b, IMM5 as u8)
+    vcmpps256(a, b, const { IMM5 as u8 })
 }
 
 /// Compares the lower double-precision (64-bit) floating-point element in
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index ee8b7e75..f013a9ea 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -1,6 +1,6 @@
 //! `x86` and `x86_64` intrinsics.
 
-use crate::{intrinsics, marker::Sized, mem::transmute};
+use crate::{marker::Sized, mem::transmute};
 
 #[macro_use]
 mod macros;
@@ -807,6 +807,7 @@ pub use self::adx::*;
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
+/*
 mod avx512f;
 pub use self::avx512f::*;
 
@@ -854,3 +855,4 @@ pub use self::f16c::*;
 
 mod avx512bf16;
 pub use self::avx512bf16::*;
+*/
diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs
index ec429803..20cb774f 100644
--- a/crates/core_arch/src/x86/test.rs
+++ b/crates/core_arch/src/x86/test.rs
@@ -124,7 +124,7 @@ mod x86_polyfill {
     pub use crate::core_arch::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
 }
 pub use self::x86_polyfill::*;
-
+/*
 pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
     assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b))
 }
@@ -142,3 +142,4 @@ pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) {
         panic!("{:?} != {:?}", a, b);
     }
 }
+*/
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index 461874ec..468dbc45 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -36,8 +36,10 @@ pub use self::bmi2::*;
 mod avx2;
 pub use self::avx2::*;
 
+/*
 mod avx512f;
 pub use self::avx512f::*;
+*/
 
 mod bswap;
 pub use self::bswap::*;
patch to disable fake cpuid
diff --git a/src/inline_asm.rs b/src/inline_asm.rs
index 3ba530c0..ea3c2f8e 100644
--- a/src/inline_asm.rs
+++ b/src/inline_asm.rs
@@ -51,7 +51,7 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         }
 
         // Used by stdarch
-        if template[0] == InlineAsmTemplatePiece::String("mov ".to_string())
+        /*if template[0] == InlineAsmTemplatePiece::String("mov ".to_string())
             && matches!(
                 template[1],
                 InlineAsmTemplatePiece::Placeholder {
@@ -129,7 +129,7 @@ pub(crate) fn codegen_inline_asm<'tcx>(
             let destination_block = fx.get_block(destination.unwrap());
             fx.bcx.ins().jump(destination_block, &[]);
             return;
-        }
+        }*/
 
         // Used by compiler-builtins
         if fx.tcx.symbol_name(fx.instance).name.starts_with("___chkstk") {

@bjorn3 bjorn3 merged commit e44f47a into master Jun 23, 2023
@bjorn3 bjorn3 deleted the more_vendor_intrinsics branch June 23, 2023 12:17
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant