gitee.com/quant1x/gox@v1.7.6/num/asm/_cpp/boolean.cpp (about)

     1  #include <cstddef>
     2  #include <x86intrin.h>
     3  
     4  void Not_V(bool* x, size_t n) {
     5      for (size_t i = 0; i < n; i++) {
     6          x[i] = !x[i];
     7      }
     8  }
     9  
    10  void And_V(bool* __restrict x, bool* __restrict y, size_t n) {
    11      for (size_t i = 0; i < n; i++) {
    12          x[i] = x[i] & y[i];
    13      }
    14  }
    15  
    16  void Or_V(bool* __restrict x, bool* __restrict y, size_t n) {
    17      for (size_t i = 0; i < n; i++) {
    18          x[i] = x[i] | y[i];
    19      }
    20  }
    21  
    22  void Xor_V(bool* __restrict x, bool* __restrict y, size_t n) {
    23      for (size_t i = 0; i < n; i++) {
    24          x[i] = x[i] != y[i];
    25      }
    26  }
    27  
    28  template<typename T>
    29  size_t Select(T* __restrict dst, T* __restrict x, bool* __restrict y, size_t n) { // not vectorized
    30      size_t cnt = 0;
    31      for (size_t i = 0; i < n; i++) {
    32          if (y[i]) {
    33              dst[cnt++] = x[i];
    34          }
    35      }
    36      return cnt;
    37  }
    38  
    39  size_t Select_F64_I(double* dst, double* x, bool* y, size_t n) {
    40      return Select(dst, x, y, n);
    41  }
    42  
    43  size_t Select_F32_I(float* dst, float* x, bool* y, size_t n) {
    44      return Select(dst, x, y, n);
    45  }
    46  
    47  bool All_I(bool* __restrict x, size_t n) {
    48      __m256i zeros = _mm256_setzero_si256();
    49  
    50      size_t i = 0;
    51      for (; i < (n & size_t(-32)); i += 32) {
    52          __m256i y = _mm256_loadu_si256((__m256i_u*)&x[i]);
    53          __m256i m = _mm256_cmpeq_epi8(y, zeros);
    54          if (!_mm256_testz_si256(m, m)) {
    55              return false;
    56          }
    57      }
    58      for (; i < n; i++) {
    59          if (!x[i]) {
    60              return false;
    61          }
    62      }
    63  
    64      return true;
    65  }
    66  
    67  
    68  bool Any_I(bool* __restrict x, size_t n) {
    69      size_t i = 0;
    70      for (; i < (n & size_t(-32)); i += 32) {
    71          __m256i y = _mm256_loadu_si256((__m256i_u*)&x[i]);
    72          if (!_mm256_testz_si256(y, y)) {
    73              return true;
    74          }
    75      }
    76      for (; i < n; i++) {
    77          if (x[i]) {
    78              return true;
    79          }
    80      }
    81  
    82      return false;
    83  }
    84  
    85  bool None_I(bool* __restrict x, size_t n) {
    86      size_t i = 0;
    87      for (; i < (n & size_t(-32)); i += 32) {
    88          __m256i y = _mm256_loadu_si256((__m256i_u*)&x[i]);
    89          if (!_mm256_testz_si256(y, y)) {
    90              return false;
    91          }
    92      }
    93      for (; i < n; i++) {
    94          if (x[i]) {
    95              return false;
    96          }
    97      }
    98  
    99      return true;
   100  }
   101  
   102  size_t Count_I(bool* x, size_t n) {
   103      size_t cnt = 0;
   104      for (size_t i = 0; i < n; i++) {
   105          cnt += x[i];
   106      }
   107      return cnt;
   108  }