github.com/cloudwego/dynamicgo@v0.2.6-0.20240519101509-707f41b6b834/native/fastbytes.c

github.com/cloudwego/dynamicgo@v0.2.6-0.20240519101509-707f41b6b834/native/fastbytes.c (about)

     1  /*
     2   * Copyright 2023 CloudWeGo Authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "native.h"
    18  
    19  #if USE_AVX2
    20  static const uintptr_t ALIGN_MASK = 31;
    21  #else
    22  static const uintptr_t ALIGN_MASK = 15;
    23  #endif
    24  
    25  size_t lspace(const char *sp, size_t nb, size_t p)
    26  {
    27      int32_t ms;
    28      const char *ss = sp;
    29  
    30      /* seek to `p` */
    31      sp += p;
    32      nb -= p;
    33  
    34      /* likely to run into non-spaces within a few characters, try scalar code first */
    35      while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK))
    36      {
    37          switch ((nb--, *sp++))
    38          {
    39          case ' ':
    40              break;
    41          case '\r':
    42              break;
    43          case '\n':
    44              break;
    45          case '\t':
    46              break;
    47          default:
    48              return sp - ss - 1;
    49          }
    50      }
    51  
    52  #if USE_AVX2
    53      /* 32-byte loop */
    54      while (likely(nb >= 32))
    55      {
    56          __m256i x = _mm256_load_si256((const void *)sp);
    57          __m256i a = _mm256_cmpeq_epi8(x, _mm256_set1_epi8(' '));
    58          __m256i b = _mm256_cmpeq_epi8(x, _mm256_set1_epi8('\t'));
    59          __m256i c = _mm256_cmpeq_epi8(x, _mm256_set1_epi8('\n'));
    60          __m256i d = _mm256_cmpeq_epi8(x, _mm256_set1_epi8('\r'));
    61          __m256i u = _mm256_or_si256(a, b);
    62          __m256i v = _mm256_or_si256(c, d);
    63          __m256i w = _mm256_or_si256(u, v);
    64  
    65          /* check for matches */
    66          if ((ms = _mm256_movemask_epi8(w)) != -1)
    67          {
    68              _mm256_zeroupper();
    69              return sp - ss + __builtin_ctzll(~(uint64_t)ms);
    70          }
    71  
    72          /* move to next block */
    73          sp += 32;
    74          nb -= 32;
    75      }
    76  
    77      /* clear upper half to avoid AVX-SSE transition penalty */
    78      _mm256_zeroupper();
    79  #endif
    80  
    81      /* 16-byte loop */
    82      while (likely(nb >= 16))
    83      {
    84          __m128i x = _mm_load_si128((const void *)sp);
    85          __m128i a = _mm_cmpeq_epi8(x, _mm_set1_epi8(' '));
    86          __m128i b = _mm_cmpeq_epi8(x, _mm_set1_epi8('\t'));
    87          __m128i c = _mm_cmpeq_epi8(x, _mm_set1_epi8('\n'));
    88          __m128i d = _mm_cmpeq_epi8(x, _mm_set1_epi8('\r'));
    89          __m128i u = _mm_or_si128(a, b);
    90          __m128i v = _mm_or_si128(c, d);
    91          __m128i w = _mm_or_si128(u, v);
    92  
    93          /* check for matches */
    94          if ((ms = _mm_movemask_epi8(w)) != 0xffff)
    95          {
    96              return sp - ss + __builtin_ctz(~ms);
    97          }
    98  
    99          /* move to next block */
   100          sp += 16;
   101          nb -= 16;
   102      }
   103  
   104      /* remaining bytes, do with scalar code */
   105      while (nb-- > 0)
   106      {
   107          switch (*sp++)
   108          {
   109          case ' ':
   110              break;
   111          case '\r':
   112              break;
   113          case '\n':
   114              break;
   115          case '\t':
   116              break;
   117          default:
   118              return sp - ss - 1;
   119          }
   120      }
   121  
   122      /* all the characters are spaces */
   123      return sp - ss;
   124  }