github.com/cloudwego/dynamicgo@v0.2.6-0.20240519101509-707f41b6b834/native/fastbytes.c (about) 1 /* 2 * Copyright 2023 CloudWeGo Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "native.h" 18 19 #if USE_AVX2 20 static const uintptr_t ALIGN_MASK = 31; 21 #else 22 static const uintptr_t ALIGN_MASK = 15; 23 #endif 24 25 size_t lspace(const char *sp, size_t nb, size_t p) 26 { 27 int32_t ms; 28 const char *ss = sp; 29 30 /* seek to `p` */ 31 sp += p; 32 nb -= p; 33 34 /* likely to run into non-spaces within a few characters, try scalar code first */ 35 while (nb > 0 && ((uintptr_t)sp & ALIGN_MASK)) 36 { 37 switch ((nb--, *sp++)) 38 { 39 case ' ': 40 break; 41 case '\r': 42 break; 43 case '\n': 44 break; 45 case '\t': 46 break; 47 default: 48 return sp - ss - 1; 49 } 50 } 51 52 #if USE_AVX2 53 /* 32-byte loop */ 54 while (likely(nb >= 32)) 55 { 56 __m256i x = _mm256_load_si256((const void *)sp); 57 __m256i a = _mm256_cmpeq_epi8(x, _mm256_set1_epi8(' ')); 58 __m256i b = _mm256_cmpeq_epi8(x, _mm256_set1_epi8('\t')); 59 __m256i c = _mm256_cmpeq_epi8(x, _mm256_set1_epi8('\n')); 60 __m256i d = _mm256_cmpeq_epi8(x, _mm256_set1_epi8('\r')); 61 __m256i u = _mm256_or_si256(a, b); 62 __m256i v = _mm256_or_si256(c, d); 63 __m256i w = _mm256_or_si256(u, v); 64 65 /* check for matches */ 66 if ((ms = _mm256_movemask_epi8(w)) != -1) 67 { 68 _mm256_zeroupper(); 69 return sp - ss + __builtin_ctzll(~(uint64_t)ms); 70 } 71 72 /* move to next block */ 73 sp += 32; 74 nb -= 32; 75 } 76 77 /* clear upper half to avoid AVX-SSE transition penalty */ 78 _mm256_zeroupper(); 79 #endif 80 81 /* 16-byte loop */ 82 while (likely(nb >= 16)) 83 { 84 __m128i x = _mm_load_si128((const void *)sp); 85 __m128i a = _mm_cmpeq_epi8(x, _mm_set1_epi8(' ')); 86 __m128i b = _mm_cmpeq_epi8(x, _mm_set1_epi8('\t')); 87 __m128i c = _mm_cmpeq_epi8(x, _mm_set1_epi8('\n')); 88 __m128i d = _mm_cmpeq_epi8(x, _mm_set1_epi8('\r')); 89 __m128i u = _mm_or_si128(a, b); 90 __m128i v = _mm_or_si128(c, d); 91 __m128i w = _mm_or_si128(u, v); 92 93 /* check for matches */ 94 if ((ms = _mm_movemask_epi8(w)) != 0xffff) 95 { 96 return sp - ss + __builtin_ctz(~ms); 97 } 98 99 /* move to next block */ 100 sp += 16; 101 nb -= 16; 102 } 103 104 /* remaining bytes, do with scalar code */ 105 while (nb-- > 0) 106 { 107 switch (*sp++) 108 { 109 case ' ': 110 break; 111 case '\r': 112 break; 113 case '\n': 114 break; 115 case '\t': 116 break; 117 default: 118 return sp - ss - 1; 119 } 120 } 121 122 /* all the characters are spaces */ 123 return sp - ss; 124 }