gitee.com/quant1x/gox@v1.7.6/num/asm/_cpp/find.cpp (about) 1 #include <cstddef> 2 #include <x86intrin.h> 3 4 size_t Find_F64(double* x, double a, size_t n) { 5 __m256d va = _mm256_set1_pd(a); 6 7 size_t i = 0; 8 for (; i < (n & size_t(-8)); i += 8) { 9 __m256d y1 = _mm256_loadu_pd(&x[i]); 10 __m256d y2 = _mm256_loadu_pd(&x[i + 4]); 11 __m256i m1 = _mm256_cmpeq_epi64(va, y1); 12 __m256i m2 = _mm256_cmpeq_epi64(va, y2); 13 __m256i m = _mm256_or_si256(m1, m2); 14 if (!_mm256_testz_si256(m, m)) { 15 int mask = (_mm256_movemask_pd((__m256)m2) << 4) + _mm256_movemask_pd((__m256)m1); 16 return i + __builtin_ctz(mask); 17 } 18 } 19 for (; i < n; i++) { 20 if (x[i] == a) { 21 return i; 22 } 23 } 24 25 return i; 26 } 27 28 size_t Find_F32(float* x, float a, size_t n) { 29 __m256 va = _mm256_set1_ps(a); 30 31 size_t i = 0; 32 for (; i < (n & size_t(-16)); i += 16) { 33 __m256 y1 = _mm256_loadu_ps(&x[i]); 34 __m256 y2 = _mm256_loadu_ps(&x[i + 8]); 35 __m256i m1 = _mm256_cmpeq_epi32(va, y1); 36 __m256i m2 = _mm256_cmpeq_epi32(va, y2); 37 __m256i m = _mm256_or_si256(m1, m2); 38 if (!_mm256_testz_si256(m, m)) { 39 int mask = (_mm256_movemask_ps((__m256)m2) << 8) + _mm256_movemask_ps((__m256)m1); 40 return i + __builtin_ctz(mask); 41 } 42 } 43 for (; i < n; i++) { 44 if (x[i] == a) { 45 return i; 46 } 47 } 48 49 return i; 50 }