We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
// sse2 version template inline bool bytescompare(const Char* a, const Char* b, size_t n) { size_t offset = 0; size_t offset_end = n / 16 * 16; #ifdef SSE2
for (; offset < offset_end; offset += 16) { const __m128i vec_1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset)); const __m128i vec_2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset)); __m128i compare_result = _mm_cmpeq_epi8(vec_1, vec_2); int mask = _mm_movemask_epi8(compare_result); if (mask != 0xFFFF) return false; }
#endif for (; offset < n; ++offset) { if (a[offset] != b[offset]) return false; } return true; }
// avx version template inline bool bytescompare_avx(const Char* a, const Char* b, size_t n) { size_t offset = 0; size_t offset_end = n / 32 * 32; #ifdef AVX2 for (; offset < offset_end; offset += 32) { const __m256i vec_1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset)); const __m256i vec_2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset)); __m256i compare_result = _mm256_cmpeq_epi8(vec_1, vec_2); int mask = _mm256_movemask_epi8(compare_result); if (mask != 0xFFFFFFFF) return false; } #endif for (; offset < n; ++offset) { if (a[offset] != b[offset]) return false; } return true; }
其中测试用例如下: void test_for_bytescompare() { std::srand(static_cast(std::time(nullptr))); // 初始化随机数生成器 const size_t arraySize = 320000; // 设置数组大小 // 生成两个随机字节数组 char array1[arraySize]; char array2[arraySize]; generateRandomByteArray(array1, arraySize); std::copy(array1, array1 + arraySize, array2); auto start = chrono::high_resolution_clock::now(); bool res = NBSimdBooster::bytescompare_avx(array1, array2, arraySize); auto end = chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; cout << "time: " << elapsed_seconds.count() << endl; cout << res << endl; } 其中 编译命令:g++ -O2 test.cpp -o pj1 -mavx2 sse版本耗时:1.4581e-05s avx版本耗时:2.7021e-05s
The text was updated successfully, but these errors were encountered:
你的cpu型号是不是比较老?
Sorry, something went wrong.
No branches or pull requests
// sse2 version
template
inline bool bytescompare(const Char* a, const Char* b, size_t n)
{
size_t offset = 0;
size_t offset_end = n / 16 * 16;
#ifdef SSE2
#endif
for (; offset < n; ++offset)
{
if (a[offset] != b[offset]) return false;
}
return true;
}
// avx version
template
inline bool bytescompare_avx(const Char* a, const Char* b, size_t n)
{
size_t offset = 0;
size_t offset_end = n / 32 * 32;
#ifdef AVX2
for (; offset < offset_end; offset += 32)
{
const __m256i vec_1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
const __m256i vec_2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
__m256i compare_result = _mm256_cmpeq_epi8(vec_1, vec_2);
int mask = _mm256_movemask_epi8(compare_result);
if (mask != 0xFFFFFFFF) return false;
}
#endif
for (; offset < n; ++offset)
{
if (a[offset] != b[offset]) return false;
}
return true;
}
其中测试用例如下:
void test_for_bytescompare()
{
std::srand(static_cast(std::time(nullptr))); // 初始化随机数生成器
const size_t arraySize = 320000; // 设置数组大小
// 生成两个随机字节数组
char array1[arraySize];
char array2[arraySize];
generateRandomByteArray(array1, arraySize);
std::copy(array1, array1 + arraySize, array2);
auto start = chrono::high_resolution_clock::now();
bool res = NBSimdBooster::bytescompare_avx(array1, array2, arraySize);
auto end = chrono::high_resolution_clock::now();
std::chrono::duration elapsed_seconds = end - start;
cout << "time: " << elapsed_seconds.count() << endl;
cout << res << endl;
}
其中
编译命令:g++ -O2 test.cpp -o pj1 -mavx2
sse版本耗时:1.4581e-05s
avx版本耗时:2.7021e-05s
The text was updated successfully, but these errors were encountered: