github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/scanning.h (about) 1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "native.h" 20 #include "utf8.h" 21 #include "utils.h" 22 #include "parsing.h" 23 #include "lspace.h" 24 #include "atof_native.h" 25 #include "atof_eisel_lemire.h" 26 27 static always_inline long skip_number_1(const GoString *src, long *p); 28 static always_inline void vnumber_1(const GoString *src, long *p, JsonState *ret); 29 static always_inline long skip_string_1(const GoString *src, long *p, uint64_t flags); 30 static always_inline long skip_positive_1(const GoString *src, long *p); 31 static always_inline long skip_negative_1(const GoString *src, long *p); 32 33 static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa; 34 static const uint64_t EVEN_MASK = 0x5555555555555555; 35 36 // NOTE: mask referenced from decoder/decoder.go 37 static const uint64_t MASK_VALIDATE_STRING = 1ull << 5; 38 static const uint64_t MASK_ALLOW_CONTROL = 1ull << 31; 39 40 static const double P10_TAB[23] = { 41 /* <= the connvertion to double is not exact when less than 1 => */ 1e-000, 42 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 43 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 44 1e+021, 1e+022 /* <= the connvertion to double is not exact when larger, => */ 45 }; 46 47 static always_inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) { 48 uint32_t v; 49 uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v); 50 51 /* set the carry */ 52 *vo = c; 53 return v; 54 } 55 56 static always_inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) { 57 unsigned long long v; 58 uint64_t c = __builtin_uaddll_overflow(v1, v2, &v); 59 60 /* set the carry */ 61 *vo = c; 62 return v; 63 } 64 65 static always_inline char isspace(char ch) { 66 return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t'; 67 } 68 69 const int MASK_USE_NUMBER = 1<<1; 70 71 static always_inline void vdigits(const GoString *src, long *p, JsonState *ret, uint64_t flag) { 72 --*p; 73 if (flag & MASK_USE_NUMBER) { 74 long i = skip_number_1(src, p); 75 if (i < 0) { 76 ret->vt = i; 77 return; 78 } 79 ret->vt = V_DOUBLE; 80 ret->ep = i; 81 return; 82 } 83 vnumber_1(src, p, ret); 84 } 85 86 static always_inline char advance_ns(const GoString *src, long *p) { 87 size_t vi = *p; 88 size_t nb = src->len; 89 const char * sp = src->buf; 90 91 /* it's likely to run into non-spaces within a few 92 * characters, so test up to 4 characters manually */ 93 if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++; 94 if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++; 95 if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++; 96 if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++; 97 98 /* check EOF */ 99 if (vi >= nb) { 100 *p = vi; 101 return 0; 102 } 103 104 /* too many spaces, use SIMD to search for characters */ 105 if ((vi = lspace_1(sp, nb, vi)) >= nb) { 106 return 0; 107 } 108 109 nospace: 110 *p = vi + 1; 111 return src->buf[vi]; 112 } 113 114 static always_inline int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val) { 115 if (*p > src->len + dec - 4) { 116 *p = src->len; 117 return -ERR_EOF; 118 } else if (*(uint32_t *)(src->buf + *p - dec) == val) { 119 *p += 4 - dec; 120 return ret; 121 } else { 122 *p -= dec; 123 for (int i = 0; src->buf[*p] == (val & 0xff) && i < 4; i++, ++*p) { val >>= 8; } 124 return -ERR_INVAL; 125 } 126 } 127 128 static always_inline ssize_t advance_string_default(const GoString *src, long p, int64_t *ep) { 129 char ch; 130 uint64_t es; 131 uint64_t fe; 132 uint64_t os; 133 uint64_t m0; 134 uint64_t m1; 135 uint64_t cr = 0; 136 137 /* prevent out-of-bounds accessing */ 138 if (unlikely(src->len == p)) { 139 return -ERR_EOF; 140 } 141 142 /* buffer pointers */ 143 size_t nb = src->len; 144 const char * sp = src->buf; 145 const char * ss = src->buf; 146 147 #define ep_init() *ep = -1; 148 #define ep_setc() ep_setx(sp - ss - 1) 149 #define ep_setx(x) if (*ep == -1) { *ep = (x); } 150 151 /* seek to `p` */ 152 nb -= p; 153 sp += p; 154 ep_init() 155 156 #if USE_AVX2 157 /* initialize vectors */ 158 __m256i v0; 159 __m256i v1; 160 __m256i q0; 161 __m256i q1; 162 __m256i x0; 163 __m256i x1; 164 __m256i cq = _mm256_set1_epi8('"'); 165 __m256i cx = _mm256_set1_epi8('\\'); 166 167 /* partial masks */ 168 uint32_t s0; 169 uint32_t s1; 170 uint32_t t0; 171 uint32_t t1; 172 #else 173 /* initialize vectors */ 174 __m128i v0; 175 __m128i v1; 176 __m128i v2; 177 __m128i v3; 178 __m128i q0; 179 __m128i q1; 180 __m128i q2; 181 __m128i q3; 182 __m128i x0; 183 __m128i x1; 184 __m128i x2; 185 __m128i x3; 186 __m128i cq = _mm_set1_epi8('"'); 187 __m128i cx = _mm_set1_epi8('\\'); 188 189 /* partial masks */ 190 uint32_t s0; 191 uint32_t s1; 192 uint32_t s2; 193 uint32_t s3; 194 uint32_t t0; 195 uint32_t t1; 196 uint32_t t2; 197 uint32_t t3; 198 #endif 199 200 #define m0_mask(add) \ 201 m1 &= ~cr; \ 202 fe = (m1 << 1) | cr; \ 203 os = (m1 & ~fe) & ODD_MASK; \ 204 es = add(os, m1, &cr) << 1; \ 205 m0 &= ~(fe & (es ^ EVEN_MASK)); 206 207 /* 64-byte SIMD loop */ 208 while (likely(nb >= 64)) { 209 #if USE_AVX2 210 v0 = _mm256_loadu_si256 ((const void *)(sp + 0)); 211 v1 = _mm256_loadu_si256 ((const void *)(sp + 32)); 212 q0 = _mm256_cmpeq_epi8 (v0, cq); 213 q1 = _mm256_cmpeq_epi8 (v1, cq); 214 x0 = _mm256_cmpeq_epi8 (v0, cx); 215 x1 = _mm256_cmpeq_epi8 (v1, cx); 216 s0 = _mm256_movemask_epi8 (q0); 217 s1 = _mm256_movemask_epi8 (q1); 218 t0 = _mm256_movemask_epi8 (x0); 219 t1 = _mm256_movemask_epi8 (x1); 220 m0 = ((uint64_t)s1 << 32) | (uint64_t)s0; 221 m1 = ((uint64_t)t1 << 32) | (uint64_t)t0; 222 #else 223 v0 = _mm_loadu_si128 ((const void *)(sp + 0)); 224 v1 = _mm_loadu_si128 ((const void *)(sp + 16)); 225 v2 = _mm_loadu_si128 ((const void *)(sp + 32)); 226 v3 = _mm_loadu_si128 ((const void *)(sp + 48)); 227 q0 = _mm_cmpeq_epi8 (v0, cq); 228 q1 = _mm_cmpeq_epi8 (v1, cq); 229 q2 = _mm_cmpeq_epi8 (v2, cq); 230 q3 = _mm_cmpeq_epi8 (v3, cq); 231 x0 = _mm_cmpeq_epi8 (v0, cx); 232 x1 = _mm_cmpeq_epi8 (v1, cx); 233 x2 = _mm_cmpeq_epi8 (v2, cx); 234 x3 = _mm_cmpeq_epi8 (v3, cx); 235 s0 = _mm_movemask_epi8 (q0); 236 s1 = _mm_movemask_epi8 (q1); 237 s2 = _mm_movemask_epi8 (q2); 238 s3 = _mm_movemask_epi8 (q3); 239 t0 = _mm_movemask_epi8 (x0); 240 t1 = _mm_movemask_epi8 (x1); 241 t2 = _mm_movemask_epi8 (x2); 242 t3 = _mm_movemask_epi8 (x3); 243 m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0; 244 m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0; 245 #endif 246 /** update first quote position */ 247 if (unlikely(m1 != 0)) { 248 ep_setx(sp - ss + __builtin_ctzll(m1)) 249 } 250 251 /** mask all the escaped quotes */ 252 if (unlikely(m1 != 0 || cr != 0)) { 253 m0_mask(add64) 254 } 255 256 /* check for end quote */ 257 if (m0 != 0) { 258 return sp - ss + __builtin_ctzll(m0) + 1; 259 } 260 261 /* move to the next block */ 262 sp += 64; 263 nb -= 64; 264 } 265 266 /* 32-byte SIMD round */ 267 if (likely(nb >= 32)) { 268 #if USE_AVX2 269 v0 = _mm256_loadu_si256 ((const void *)sp); 270 q0 = _mm256_cmpeq_epi8 (v0, cq); 271 x0 = _mm256_cmpeq_epi8 (v0, cx); 272 s0 = _mm256_movemask_epi8 (q0); 273 t0 = _mm256_movemask_epi8 (x0); 274 m0 = (uint64_t)s0; 275 m1 = (uint64_t)t0; 276 #else 277 v0 = _mm_loadu_si128 ((const void *)(sp + 0)); 278 v1 = _mm_loadu_si128 ((const void *)(sp + 16)); 279 q0 = _mm_cmpeq_epi8 (v0, cq); 280 q1 = _mm_cmpeq_epi8 (v1, cq); 281 x0 = _mm_cmpeq_epi8 (v0, cx); 282 x1 = _mm_cmpeq_epi8 (v1, cx); 283 s0 = _mm_movemask_epi8 (q0); 284 s1 = _mm_movemask_epi8 (q1); 285 t0 = _mm_movemask_epi8 (x0); 286 t1 = _mm_movemask_epi8 (x1); 287 m0 = ((uint64_t)s1 << 16) | (uint64_t)s0; 288 m1 = ((uint64_t)t1 << 16) | (uint64_t)t0; 289 #endif 290 291 /** update first quote position */ 292 if (unlikely(m1 != 0)) { 293 ep_setx(sp - ss + __builtin_ctzll(m1)) 294 } 295 296 /** mask all the escaped quotes */ 297 if (unlikely(m1 != 0 || cr != 0)) { 298 m0_mask(add32) 299 } 300 301 /* check for end quote */ 302 if (m0 != 0) { 303 return sp - ss + __builtin_ctzll(m0) + 1; 304 } 305 306 /* move to the next block */ 307 sp += 32; 308 nb -= 32; 309 } 310 311 /* check for carry */ 312 if (unlikely(cr != 0)) { 313 if (nb == 0) { 314 return -ERR_EOF; 315 } else { 316 ep_setc() 317 sp++, nb--; 318 } 319 } 320 321 /* handle the remaining bytes with scalar code */ 322 while (nb-- > 0 && (ch = *sp++) != '"') { 323 if (unlikely(ch == '\\')) { 324 if (nb == 0) { 325 return -ERR_EOF; 326 } else { 327 ep_setc() 328 sp++, nb--; 329 } 330 } 331 } 332 333 #undef ep_init 334 #undef ep_setc 335 #undef ep_setx 336 #undef m0_mask 337 338 /* check for quotes */ 339 if (ch == '"') { 340 return sp - ss; 341 } else { 342 return -ERR_EOF; 343 } 344 } 345 346 #if USE_AVX2 347 348 static always_inline int _mm256_get_mask(__m256i v, __m256i t) { 349 return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, t)); 350 } 351 352 // contrl char: 0x00 ~ 0x1F 353 static always_inline int _mm256_cchars_mask(__m256i v) { 354 __m256i e1 = _mm256_cmpgt_epi8 (v, _mm256_set1_epi8(-1)); 355 __m256i e2 = _mm256_cmpgt_epi8 (v, _mm256_set1_epi8(31)); 356 return _mm256_movemask_epi8 (_mm256_andnot_si256 (e2, e1)); 357 } 358 359 // ascii: 0x00 ~ 0x7F 360 static always_inline int _mm256_nonascii_mask(__m256i v) { 361 return _mm256_movemask_epi8(v); 362 } 363 364 #endif 365 366 static always_inline int _mm_get_mask(__m128i v, __m128i t) { 367 return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t)); 368 } 369 370 // contrl char: 0x00 ~ 0x1F 371 static always_inline int _mm_cchars_mask(__m128i v) { 372 __m128i e1 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(-1)); 373 __m128i e2 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(31)); 374 return _mm_movemask_epi8 (_mm_andnot_si128 (e2, e1)); 375 } 376 377 // ascii: 0x00 ~ 0x7F 378 static always_inline int _mm_nonascii_mask(__m128i v) { 379 return _mm_movemask_epi8(v); 380 } 381 382 static always_inline ssize_t advance_string_validate(const GoString *src, long p, int64_t *ep) { 383 char ch; 384 uint64_t m0, m1, m2; 385 uint64_t es, fe, os; 386 uint64_t cr = 0; 387 long qp = 0; 388 long np = 0; 389 390 /* buffer pointers */ 391 size_t nb = src->len; 392 const char * sp = src->buf; 393 const char * ss = src->buf; 394 395 /* prevent out-of-bounds accessing */ 396 if (unlikely(nb == p)) { 397 return -ERR_EOF; 398 } 399 400 #define ep_init() *ep = -1; 401 #define ep_setc() ep_setx(sp - ss - 1) 402 #define ep_setx(x) if (*ep == -1) { *ep = (x); } 403 #define ep_seterr(x) *ep = (x); 404 405 /* seek to `p` */ 406 nb -= p; 407 sp += p; 408 ep_init() 409 410 #if USE_AVX2 411 /* initialize vectors */ 412 __m256i v0; 413 __m256i v1; 414 __m256i cq = _mm256_set1_epi8('"'); 415 __m256i cx = _mm256_set1_epi8('\\'); 416 417 /* partial masks */ 418 uint32_t s0, s1; 419 uint32_t t0, t1; 420 uint32_t c0, c1; 421 #else 422 /* initialize vectors */ 423 __m128i v0; 424 __m128i v1; 425 __m128i v2; 426 __m128i v3; 427 __m128i cq = _mm_set1_epi8('"'); 428 __m128i cx = _mm_set1_epi8('\\'); 429 430 /* partial masks */ 431 uint32_t s0, s1, s2, s3; 432 uint32_t t0, t1, t2, t3; 433 uint32_t c0, c1, c2, c3; 434 #endif 435 436 #define m0_mask(add) \ 437 m1 &= ~cr; \ 438 fe = (m1 << 1) | cr; \ 439 os = (m1 & ~fe) & ODD_MASK; \ 440 es = add(os, m1, &cr) << 1; \ 441 m0 &= ~(fe & (es ^ EVEN_MASK)); 442 443 /* 64-byte SIMD loop */ 444 while (likely(nb >= 64)) { 445 #if USE_AVX2 446 v0 = _mm256_loadu_si256 ((const void *)(sp + 0)); 447 v1 = _mm256_loadu_si256 ((const void *)(sp + 32)); 448 s0 = _mm256_get_mask(v0, cq); 449 s1 = _mm256_get_mask(v1, cq); 450 t0 = _mm256_get_mask(v0, cx); 451 t1 = _mm256_get_mask(v1, cx); 452 c0 = _mm256_cchars_mask(v0); 453 c1 = _mm256_cchars_mask(v1); 454 m0 = ((uint64_t)s1 << 32) | (uint64_t)s0; 455 m1 = ((uint64_t)t1 << 32) | (uint64_t)t0; 456 m2 = ((uint64_t)c1 << 32) | (uint64_t)c0; 457 #else 458 v0 = _mm_loadu_si128 ((const void *)(sp + 0)); 459 v1 = _mm_loadu_si128 ((const void *)(sp + 16)); 460 v2 = _mm_loadu_si128 ((const void *)(sp + 32)); 461 v3 = _mm_loadu_si128 ((const void *)(sp + 48)); 462 s0 = _mm_get_mask(v0, cq); 463 s1 = _mm_get_mask(v1, cq); 464 s2 = _mm_get_mask(v2, cq); 465 s3 = _mm_get_mask(v3, cq); 466 t0 = _mm_get_mask(v0, cx); 467 t1 = _mm_get_mask(v1, cx); 468 t2 = _mm_get_mask(v2, cx); 469 t3 = _mm_get_mask(v3, cx); 470 c0 = _mm_cchars_mask(v0); 471 c1 = _mm_cchars_mask(v1); 472 c2 = _mm_cchars_mask(v2); 473 c3 = _mm_cchars_mask(v3); 474 m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0; 475 m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0; 476 m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0; 477 478 #endif 479 480 /** update first quote position */ 481 if (unlikely(m1 != 0)) { 482 ep_setx(sp - ss + __builtin_ctzll(m1)) 483 } 484 485 /** mask all the escaped quotes */ 486 if (unlikely(m1 != 0 || cr != 0)) { 487 m0_mask(add64) 488 } 489 490 qp = m0 ? __builtin_ctzll(m0) : 64; 491 np = m2 ? __builtin_ctzll(m2) : 64; 492 493 /* get the position of end quote */ 494 if (m0 != 0) { 495 /* check control chars in JSON string */ 496 if (unlikely(np < qp)) { 497 ep_seterr(sp - ss + np) 498 499 return -ERR_INVAL; 500 } 501 return sp - ss + qp + 1; 502 } 503 504 /* check control chars in JSON string */ 505 if (unlikely(m2 != 0)) { 506 ep_setx(sp - ss + np) 507 508 return -ERR_INVAL; 509 } 510 511 /* move to the next block */ 512 sp += 64; 513 nb -= 64; 514 } 515 516 /* 32-byte SIMD round */ 517 if (likely(nb >= 32)) { 518 #if USE_AVX2 519 v0 = _mm256_loadu_si256 ((const void *)sp); 520 s0 = _mm256_get_mask (v0, cq); 521 t0 = _mm256_get_mask (v0, cx); 522 c0 = _mm256_cchars_mask(v0); 523 m0 = (uint64_t)s0; 524 m1 = (uint64_t)t0; 525 m2 = (uint64_t)c0; 526 #else 527 v0 = _mm_loadu_si128 ((const void *)(sp + 0)); 528 v1 = _mm_loadu_si128 ((const void *)(sp + 16)); 529 s0 = _mm_get_mask(v0, cq); 530 s1 = _mm_get_mask(v1, cq); 531 t0 = _mm_get_mask(v0, cx); 532 t1 = _mm_get_mask(v1, cx); 533 c0 = _mm_cchars_mask(v0); 534 c1 = _mm_cchars_mask(v1); 535 m0 = ((uint64_t)s1 << 16) | (uint64_t)s0; 536 m1 = ((uint64_t)t1 << 16) | (uint64_t)t0; 537 m2 = ((uint64_t)c1 << 16) | (uint64_t)c0; 538 #endif 539 540 /** update first quote position */ 541 if (unlikely(m1 != 0)) { 542 ep_setx(sp - ss + __builtin_ctzll(m1)) 543 } 544 545 /** mask all the escaped quotes */ 546 if (unlikely(m1 != 0 || cr != 0)) { 547 m0_mask(add32) 548 } 549 550 qp = m0 ? __builtin_ctzll(m0) : 64; 551 np = m2 ? __builtin_ctzll(m2) : 64; 552 553 /* get the position of end quote */ 554 if (m0 != 0) { 555 if (unlikely(np < qp)) { 556 ep_seterr(sp - ss + np) 557 return -ERR_INVAL; 558 } 559 return sp - ss + qp + 1; 560 } 561 562 /* check control chars in JSON string */ 563 if (unlikely(m2 != 0)) { 564 ep_seterr(sp - ss + __builtin_ctzll(m2)) 565 return -ERR_INVAL; 566 } 567 568 /* move to the next block */ 569 sp += 32; 570 nb -= 32; 571 } 572 573 /* check for carry */ 574 if (unlikely(cr != 0)) { 575 if (nb == 0) { 576 return -ERR_EOF; 577 } else { 578 ep_setc() 579 sp++, nb--; 580 } 581 } 582 583 /* handle the remaining bytes with scalar code */ 584 while (nb > 0) { 585 ch = *sp; 586 if (ch == '"') { 587 588 return sp - ss + 1; 589 } 590 591 /* valid the escaped chars */ 592 if (unlikely(ch == '\\')) { 593 if (nb == 1) { 594 return -ERR_EOF; 595 } 596 ep_setx(sp - ss) 597 sp += 2, nb -= 2; 598 continue; 599 } 600 601 /* valid unescaped chars */ 602 if (unlikely( ch >= 0 && ch <= 0x1f)) { // control chars 603 ep_seterr(sp - ss) 604 return -ERR_INVAL; 605 } 606 607 sp++, nb--; 608 } 609 return -ERR_EOF; 610 #undef ep_init 611 #undef ep_setc 612 #undef ep_setx 613 #undef ep_seterr 614 #undef m0_mask 615 } 616 617 static always_inline ssize_t advance_string(const GoString *src, long p, int64_t *ep, uint64_t flags) { 618 if ((flags & MASK_VALIDATE_STRING) != 0) { 619 return advance_string_validate(src, p, ep); 620 } else { 621 return advance_string_default(src, p, ep); 622 } 623 } 624 625 #define set_vt(t) \ 626 ret->vt = t; 627 628 #define init_ret(t) \ 629 ret->vt = t; \ 630 ret->dv = 0.0; \ 631 ret->iv = 0; \ 632 ret->ep = *p; 633 634 #define check_eof() \ 635 if (i >= n) { \ 636 *p = n; \ 637 ret->vt = -ERR_EOF; \ 638 return; \ 639 } 640 641 #define check_sign(on_neg) \ 642 if (s[i] == '-') { \ 643 i++; \ 644 on_neg; \ 645 check_eof() \ 646 } 647 648 #define check_digit() \ 649 if (s[i] < '0' || s[i] > '9') { \ 650 *p = i; \ 651 ret->vt = -ERR_INVAL; \ 652 return; \ 653 } 654 655 #define check_leading_zero() \ 656 if (s[i] == '0' && (i >= n || (s[i + 1] != '.' && s[i + 1] != 'e' && s[i + 1] != 'E'))) { \ 657 *p = ++i; \ 658 return; \ 659 } 660 661 #define parse_sign(sgn) \ 662 if (s[i] == '+' || s[i] == '-') { \ 663 sgn = s[i++] == '+' ? 1 : -1; \ 664 check_eof() \ 665 } 666 667 #define is_digit(val) \ 668 '0' <= val && val <= '9' 669 670 #define add_integer_to_mantissa(man, man_nd, exp10, dig) \ 671 if (man_nd < 19) { \ 672 man = man * 10 + dig; \ 673 man_nd++; \ 674 } else { \ 675 exp10++; \ 676 } 677 678 #define add_float_to_mantissa(man, man_nd, exp10, dig) \ 679 man = man * 10 + dig; \ 680 man_nd++; \ 681 exp10--; 682 683 #define parse_float_digits(val, sgn, ...) \ 684 while (i < n && s[i] >= '0' && s[i] <= '9' __VA_ARGS__) { \ 685 val *= 10; \ 686 val += sgn * (s[i++] - '0'); \ 687 } 688 689 #define parse_integer_digits(val, sgn, ovf) \ 690 while (i < n && s[i] >= '0' && s[i] <= '9') { \ 691 if (add_digit_overflow(val, sgn * (s[i++] - '0'))) { \ 692 ovf = 1; \ 693 break; \ 694 } \ 695 } 696 697 #define add_digit_overflow(val, chr) ( \ 698 __builtin_mul_overflow(val, 10, &val) || \ 699 __builtin_add_overflow(val, chr, &val) \ 700 ) 701 702 #define vinteger(type, sgn, on_neg) \ 703 int ovf = 0; \ 704 type val = 0; \ 705 \ 706 /* initial buffer pointers */ \ 707 long i = *p; \ 708 size_t n = src->len; \ 709 const char * s = src->buf; \ 710 \ 711 /* initialize the result, and check for '-' */ \ 712 init_ret(V_INTEGER) \ 713 check_eof() \ 714 check_sign(on_neg) \ 715 \ 716 /* check for leading zero or any digits */ \ 717 check_digit() \ 718 check_leading_zero() \ 719 parse_integer_digits(val, sgn, ovf) \ 720 \ 721 /* check for overflow */ \ 722 if (ovf) { \ 723 *p = i - 1; \ 724 ret->vt = -ERR_OVERFLOW; \ 725 return; \ 726 } \ 727 \ 728 /* check for the decimal part */ \ 729 if (i < n && s[i] == '.') { \ 730 *p = i; \ 731 ret->vt = -ERR_NUMBER_FMT; \ 732 return; \ 733 } \ 734 \ 735 /* check for the exponent part */ \ 736 if (i < n && (s[i] == 'e' || s[i] == 'E')) { \ 737 *p = i; \ 738 ret->vt = -ERR_NUMBER_FMT; \ 739 return; \ 740 } \ 741 \ 742 /* update the result */ \ 743 *p = i; \ 744 ret->iv = val; 745 746 /** check whether float can represent the val exactly **/ 747 static always_inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) { 748 *val = (double)man; 749 750 if (man >> 52 != 0) { 751 return false; 752 } 753 754 /* equal to if (sgn == -1) { *val *= -1; } */ 755 *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63); 756 757 if (exp == 0 || man == 0) { 758 return true; 759 } else if (exp > 0 && exp <= 15+22) { 760 /* uint64 integers: accurate range <= 10^15 * 761 * Powers of 10: accurate range <= 10^22, as P10_TAB * 762 * Example: man 1, exp 36, is ok */ 763 if (exp > 22) { 764 *val *= P10_TAB[exp-22]; 765 exp = 22; 766 } 767 768 /* f is not accurate when too larger */ 769 if (*val > 1e15 || *val < -1e15) { 770 return false; 771 } 772 773 *val *= P10_TAB[exp]; 774 return true; 775 } else if (exp < 0 && exp >= -22) { 776 *val /= P10_TAB[-exp]; 777 return true; 778 } 779 780 return false; 781 } 782 783 static always_inline double atof_fast(uint64_t man, int exp, int sgn, int trunc, double *val) { 784 double val_up = 0.0; 785 786 /* look-up for fast atof if the conversion can be exactly */ 787 if (is_atof_exact(man, exp, sgn, val)) { 788 return true; 789 } 790 791 /* A fast atof algorithm for high percison */ 792 if (atof_eisel_lemire64_1(man, exp, sgn, val)) { 793 if (!trunc || (atof_eisel_lemire64_1(man+1, exp, sgn, &val_up) && val_up == *val)) { 794 return true; 795 } 796 } 797 798 return false; 799 } 800 801 static bool always_inline is_overflow(uint64_t man, int sgn, int exp10) { 802 /* the former exp10 != 0 means man has overflowed 803 * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */ 804 return exp10 != 0 || 805 ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63)); 806 } 807 808 static always_inline void vnumber_1(const GoString *src, long *p, JsonState *ret) { 809 int sgn = 1; 810 uint64_t man = 0; // mantissa for double (float64) 811 int man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t 812 int exp10 = 0; // val = sgn * man * 10 ^ exp10 813 int trunc = 0; 814 double val = 0; 815 816 /* initial buffer pointers */ 817 long i = *p; 818 size_t n = src->len; 819 const char * s = src->buf; 820 char *dbuf = ret->dbuf; 821 ssize_t dcap = ret->dcap; 822 823 /* initialize the result, and check for EOF */ 824 init_ret(V_INTEGER) 825 check_eof() 826 check_sign(sgn = -1) 827 828 /* check for leading zero */ 829 check_digit() 830 check_leading_zero() 831 832 /* parse the integer part */ 833 while (i < n && is_digit(s[i])) { 834 add_integer_to_mantissa(man, man_nd, exp10, (s[i] - '0')) 835 i++; 836 } 837 838 if (exp10 > 0) { 839 trunc = 1; 840 } 841 842 /* check for decimal points */ 843 if (i < n && s[i] == '.') { 844 i++; 845 set_vt(V_DOUBLE) 846 check_eof() 847 check_digit() 848 } 849 850 /* skip the leading zeros of 0.000xxxx */ 851 if (man == 0 && exp10 == 0) { 852 while (i < n && s[i] == '0') { 853 i++; 854 exp10--; 855 } 856 man = 0; 857 man_nd = 0; 858 } 859 860 /* the fractional part (uint64_t mantissa can represent at most 19 digits) */ 861 while (i < n && man_nd < 19 && is_digit(s[i])) { 862 add_float_to_mantissa(man, man_nd, exp10, (s[i] - '0')) 863 i++; 864 } 865 866 /* skip the remaining digits */ 867 while (i < n && is_digit(s[i])) { 868 trunc = 1; 869 i++; 870 } 871 872 /* check for exponent */ 873 if (i < n && (s[i] == 'e' || s[i] == 'E')) { 874 int esm = 1; 875 int exp = 0; 876 877 /* check for the '+' or '-' sign, and parse the power */ 878 i++; 879 set_vt(V_DOUBLE) 880 check_eof() 881 parse_sign(esm) 882 check_digit() 883 while (i < n && is_digit(s[i])) { 884 if (exp < 10000) { 885 exp = exp * 10 + (s[i] - '0'); 886 } 887 i++; 888 } 889 exp10 += exp * esm; 890 goto parse_float; 891 } 892 893 if (ret->vt == V_INTEGER) { 894 if (!is_overflow(man, sgn, exp10)) { 895 ret->iv = (int64_t)man * sgn; 896 897 /* following lines equal to ret->dv = (double)(man) * sgn */ 898 ret->dv = (double)(man); 899 *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63); 900 901 *p = i; 902 return; 903 } 904 set_vt(V_DOUBLE) 905 } 906 907 parse_float: 908 /* when fast algorithms failed, use slow fallback.*/ 909 if(!atof_fast(man, exp10, sgn, trunc, &val)) { 910 val = atof_native_1(s + *p, i - *p, dbuf, dcap); 911 } 912 913 /* check parsed double val */ 914 if (is_infinity(val)) { 915 ret->vt = -ERR_FLOAT_INF; 916 } 917 918 /* update the result */ 919 ret->dv = val; 920 *p = i; 921 } 922 923 /** Value Skipping FSM **/ 924 925 #define FSM_VAL 0 926 #define FSM_ARR 1 927 #define FSM_OBJ 2 928 #define FSM_KEY 3 929 #define FSM_ELEM 4 930 #define FSM_ARR_0 5 931 #define FSM_OBJ_0 6 932 933 #define FSM_DROP(v) (v)->sp-- 934 #define FSM_REPL(v, t) (v)->vt[(v)->sp - 1] = (t) 935 936 #define FSM_CHAR(c) do { if (ch != (c)) return -ERR_INVAL; } while (0) 937 #define FSM_XERR(v) do { long r = (v); if (r < 0) return r; } while (0) 938 939 static always_inline void fsm_init(StateMachine *self, int vt) { 940 self->sp = 1; 941 self->vt[0] = vt; 942 } 943 944 static always_inline long fsm_push(StateMachine *self, int vt) { 945 if (self->sp >= MAX_RECURSE) { 946 return -ERR_RECURSE_MAX; 947 } else { 948 self->vt[self->sp++] = vt; 949 return 0; 950 } 951 } 952 953 static always_inline long fsm_exec_1(StateMachine *self, const GoString *src, long *p, uint64_t flags) { 954 int vt; 955 char ch; 956 long vi = -1; 957 958 /* run until no more nested values */ 959 while (self->sp) { 960 ch = advance_ns(src, p); 961 if (ch == 0) { 962 return -ERR_EOF; 963 } 964 vt = self->vt[self->sp - 1]; 965 966 /* set the start address if any */ 967 if (vi == -1) { 968 vi = *p - 1; 969 } 970 971 /* check for special types */ 972 switch (vt) { 973 default: { 974 FSM_DROP(self); 975 break; 976 } 977 978 /* arrays */ 979 case FSM_ARR: { 980 switch (ch) { 981 case ']' : FSM_DROP(self); continue; 982 case ',' : FSM_XERR(fsm_push(self, FSM_VAL)); continue; 983 default : return -ERR_INVAL; 984 } 985 } 986 987 /* objects */ 988 case FSM_OBJ: { 989 switch (ch) { 990 case '}' : FSM_DROP(self); continue; 991 case ',' : FSM_XERR(fsm_push(self, FSM_KEY)); continue; 992 default : return -ERR_INVAL; 993 } 994 } 995 996 /* object keys */ 997 case FSM_KEY: { 998 FSM_CHAR('"'); 999 FSM_REPL(self, FSM_ELEM); 1000 FSM_XERR(skip_string_1(src, p, flags)); 1001 continue; 1002 } 1003 1004 /* object element */ 1005 case FSM_ELEM: { 1006 FSM_CHAR(':'); 1007 FSM_REPL(self, FSM_VAL); 1008 continue; 1009 } 1010 1011 /* arrays, first element */ 1012 case FSM_ARR_0: { 1013 if (ch == ']') { 1014 FSM_DROP(self); 1015 continue; 1016 } else { 1017 FSM_REPL(self, FSM_ARR); 1018 break; 1019 } 1020 } 1021 1022 /* objects, first pair */ 1023 case FSM_OBJ_0: { 1024 switch (ch) { 1025 default: { 1026 return -ERR_INVAL; 1027 } 1028 1029 /* empty object */ 1030 case '}': { 1031 FSM_DROP(self); 1032 continue; 1033 } 1034 1035 /* the quote of the first key */ 1036 case '"': { 1037 FSM_REPL(self, FSM_OBJ); 1038 FSM_XERR(skip_string_1(src, p, flags)); 1039 FSM_XERR(fsm_push(self, FSM_ELEM)); 1040 continue; 1041 } 1042 } 1043 } 1044 } 1045 1046 /* simple values */ 1047 switch (ch) { 1048 case '0' : /* fallthrough */ 1049 case '1' : /* fallthrough */ 1050 case '2' : /* fallthrough */ 1051 case '3' : /* fallthrough */ 1052 case '4' : /* fallthrough */ 1053 case '5' : /* fallthrough */ 1054 case '6' : /* fallthrough */ 1055 case '7' : /* fallthrough */ 1056 case '8' : /* fallthrough */ 1057 case '9' : FSM_XERR(skip_positive_1(src, p)); break; 1058 case '-' : FSM_XERR(skip_negative_1(src, p)); break; 1059 case 'n' : FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_NULL)); break; 1060 case 't' : FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_TRUE)); break; 1061 case 'f' : FSM_XERR(advance_dword(src, p, 0, *p - 1, VS_ALSE)); break; 1062 case '[' : FSM_XERR(fsm_push(self, FSM_ARR_0)); break; 1063 case '{' : FSM_XERR(fsm_push(self, FSM_OBJ_0)); break; 1064 case '"' : FSM_XERR(skip_string_1(src, p, flags)); break; 1065 case 0 : return -ERR_EOF; 1066 default : return -ERR_INVAL; 1067 } 1068 } 1069 1070 /* all done */ 1071 return vi; 1072 } 1073 1074 #undef FSM_DROP 1075 #undef FSM_REPL 1076 #undef FSM_CHAR 1077 #undef FSM_XERR 1078 1079 #define check_bits(mv) \ 1080 if (unlikely((v = mv & (mv - 1)) != 0)) { \ 1081 return -(sp - ss + __builtin_ctz(v) + 1); \ 1082 } 1083 1084 #define check_sidx(iv) \ 1085 if (likely(iv == -1)) { \ 1086 iv = sp - ss - 1; \ 1087 } else { \ 1088 return -(sp - ss); \ 1089 } 1090 1091 #define check_vidx(iv, mv) \ 1092 if (mv != 0) { \ 1093 if (likely(iv == -1)) { \ 1094 iv = sp - ss + __builtin_ctz(mv); \ 1095 } else { \ 1096 return -(sp - ss + __builtin_ctz(mv) + 1); \ 1097 } \ 1098 } 1099 1100 static always_inline long do_skip_number(const char *sp, size_t nb) { 1101 long di = -1; 1102 long ei = -1; 1103 long si = -1; 1104 const char * ss = sp; 1105 1106 /* check for EOF */ 1107 if (nb == 0) { 1108 return -1; 1109 } 1110 1111 /* special case of '0' */ 1112 if (*sp == '0' && (nb == 1 || (sp[1] != '.' && sp[1] != 'e' && sp[1] != 'E'))) { 1113 return 1; 1114 } 1115 1116 #if USE_AVX2 1117 /* can do with AVX-2 */ 1118 if (likely(nb >= 32)) { 1119 __m256i d9 = _mm256_set1_epi8('9'); 1120 __m256i ds = _mm256_set1_epi8('/'); 1121 __m256i dp = _mm256_set1_epi8('.'); 1122 __m256i el = _mm256_set1_epi8('e'); 1123 __m256i eu = _mm256_set1_epi8('E'); 1124 __m256i xp = _mm256_set1_epi8('+'); 1125 __m256i xm = _mm256_set1_epi8('-'); 1126 1127 /* 32-byte loop */ 1128 do { 1129 __m256i sb = _mm256_loadu_si256 ((const void *)sp); 1130 __m256i i0 = _mm256_cmpgt_epi8 (sb, ds); 1131 __m256i i9 = _mm256_cmpgt_epi8 (sb, d9); 1132 __m256i id = _mm256_cmpeq_epi8 (sb, dp); 1133 __m256i il = _mm256_cmpeq_epi8 (sb, el); 1134 __m256i iu = _mm256_cmpeq_epi8 (sb, eu); 1135 __m256i ip = _mm256_cmpeq_epi8 (sb, xp); 1136 __m256i im = _mm256_cmpeq_epi8 (sb, xm); 1137 __m256i iv = _mm256_andnot_si256 (i9, i0); 1138 __m256i ie = _mm256_or_si256 (il, iu); 1139 __m256i is = _mm256_or_si256 (ip, im); 1140 __m256i rt = _mm256_or_si256 (iv, id); 1141 __m256i ru = _mm256_or_si256 (ie, is); 1142 __m256i rv = _mm256_or_si256 (rt, ru); 1143 1144 /* exponent and sign position */ 1145 uint32_t md = _mm256_movemask_epi8(id); 1146 uint32_t me = _mm256_movemask_epi8(ie); 1147 uint32_t ms = _mm256_movemask_epi8(is); 1148 uint32_t mr = _mm256_movemask_epi8(rv); 1149 1150 /* mismatch position */ 1151 uint32_t v; 1152 uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000); 1153 1154 /* mask out excess characters */ 1155 if (i != 32) { 1156 md &= (1 << i) - 1; 1157 me &= (1 << i) - 1; 1158 ms &= (1 << i) - 1; 1159 } 1160 1161 /* check & update decimal point, exponent and sign index */ 1162 check_bits(md) 1163 check_bits(me) 1164 check_bits(ms) 1165 check_vidx(di, md) 1166 check_vidx(ei, me) 1167 check_vidx(si, ms) 1168 1169 /* check for valid number */ 1170 if (i != 32) { 1171 sp += i; 1172 _mm256_zeroupper(); 1173 goto check_index; 1174 } 1175 1176 /* move to next block */ 1177 sp += 32; 1178 nb -= 32; 1179 } while (nb >= 32); 1180 1181 /* clear the upper half to prevent AVX-SSE transition penalty */ 1182 _mm256_zeroupper(); 1183 } 1184 #endif 1185 1186 /* can do with SSE */ 1187 if (likely(nb >= 16)) { 1188 __m128i dc = _mm_set1_epi8(':'); 1189 __m128i ds = _mm_set1_epi8('/'); 1190 __m128i dp = _mm_set1_epi8('.'); 1191 __m128i el = _mm_set1_epi8('e'); 1192 __m128i eu = _mm_set1_epi8('E'); 1193 __m128i xp = _mm_set1_epi8('+'); 1194 __m128i xm = _mm_set1_epi8('-'); 1195 1196 /* 16-byte loop */ 1197 do { 1198 __m128i sb = _mm_loadu_si128 ((const void *)sp); 1199 __m128i i0 = _mm_cmpgt_epi8 (sb, ds); 1200 __m128i i9 = _mm_cmplt_epi8 (sb, dc); 1201 __m128i id = _mm_cmpeq_epi8 (sb, dp); 1202 __m128i il = _mm_cmpeq_epi8 (sb, el); 1203 __m128i iu = _mm_cmpeq_epi8 (sb, eu); 1204 __m128i ip = _mm_cmpeq_epi8 (sb, xp); 1205 __m128i im = _mm_cmpeq_epi8 (sb, xm); 1206 __m128i iv = _mm_and_si128 (i9, i0); 1207 __m128i ie = _mm_or_si128 (il, iu); 1208 __m128i is = _mm_or_si128 (ip, im); 1209 __m128i rt = _mm_or_si128 (iv, id); 1210 __m128i ru = _mm_or_si128 (ie, is); 1211 __m128i rv = _mm_or_si128 (rt, ru); 1212 1213 /* exponent and sign position */ 1214 uint32_t md = _mm_movemask_epi8(id); 1215 uint32_t me = _mm_movemask_epi8(ie); 1216 uint32_t ms = _mm_movemask_epi8(is); 1217 uint32_t mr = _mm_movemask_epi8(rv); 1218 1219 /* mismatch position */ 1220 uint32_t v; 1221 uint32_t i = __builtin_ctzll(~mr | 0x00010000); 1222 1223 /* mask out excess characters */ 1224 if (i != 16) { 1225 md &= (1 << i) - 1; 1226 me &= (1 << i) - 1; 1227 ms &= (1 << i) - 1; 1228 } 1229 1230 /* check & update exponent and sign index */ 1231 check_bits(md) 1232 check_bits(me) 1233 check_bits(ms) 1234 check_vidx(di, md) 1235 check_vidx(ei, me) 1236 check_vidx(si, ms) 1237 1238 /* check for valid number */ 1239 if (i != 16) { 1240 sp += i; 1241 goto check_index; 1242 } 1243 1244 /* move to next block */ 1245 sp += 16; 1246 nb -= 16; 1247 } while (nb >= 16); 1248 } 1249 1250 /* remaining bytes, do with scalar code */ 1251 while (likely(nb-- > 0)) { 1252 switch (*sp++) { 1253 case '0' : /* fallthrough */ 1254 case '1' : /* fallthrough */ 1255 case '2' : /* fallthrough */ 1256 case '3' : /* fallthrough */ 1257 case '4' : /* fallthrough */ 1258 case '5' : /* fallthrough */ 1259 case '6' : /* fallthrough */ 1260 case '7' : /* fallthrough */ 1261 case '8' : /* fallthrough */ 1262 case '9' : break; 1263 case '.' : check_sidx(di); break; 1264 case 'e' : /* fallthrough */ 1265 case 'E' : check_sidx(ei); break; 1266 case '+' : /* fallthrough */ 1267 case '-' : check_sidx(si); break; 1268 default : sp--; goto check_index; 1269 } 1270 } 1271 check_index: 1272 if (di == 0 || si == 0 || ei == 0) { 1273 return -1; 1274 } else if (di == sp - ss - 1|| si == sp - ss - 1 || ei == sp - ss - 1) { 1275 return -(sp - ss); 1276 } else if (si > 0 && ei != si - 1) { 1277 return -si - 1; 1278 } else if (di >= 0 && ei >= 0 && di > ei - 1) { 1279 return -di - 1; 1280 } else if (di >= 0 && ei >= 0 && di == ei - 1) { 1281 return -ei - 1; 1282 } else { 1283 return sp - ss; 1284 } 1285 } 1286 1287 #undef check_bits 1288 #undef check_sidx 1289 #undef check_vidx 1290 1291 static always_inline long skip_string_1(const GoString *src, long *p, uint64_t flags) { 1292 int64_t v = -1; 1293 ssize_t q = *p - 1; // start position 1294 ssize_t e = advance_string(src, *p, &v, flags); 1295 1296 /* check for errors */ 1297 if (e < 0) { 1298 *p = e == -ERR_EOF ? src->len : v; 1299 return e; 1300 } 1301 1302 /* update the position */ 1303 *p = e; 1304 return q; 1305 } 1306 1307 static always_inline long skip_negative_1(const GoString *src, long *p) { 1308 long i = *p; 1309 long r = do_skip_number(src->buf + i, src->len - i); 1310 1311 /* check for errors */ 1312 if (r < 0) { 1313 *p -= r + 1; 1314 return -ERR_INVAL; 1315 } 1316 1317 /* update value pointer */ 1318 *p += r; 1319 return i - 1; 1320 } 1321 1322 static always_inline long skip_positive_1(const GoString *src, long *p) { 1323 long i = *p - 1; 1324 long r = do_skip_number(src->buf + i, src->len - i); 1325 1326 /* check for errors */ 1327 if (r < 0) { 1328 *p -= r + 2; 1329 return -ERR_INVAL; 1330 } 1331 1332 /* update value pointer */ 1333 *p += r - 1; 1334 return i; 1335 } 1336 1337 static always_inline long skip_number_1(const GoString *src, long *p) { 1338 const char* ss = src->buf; 1339 const char* sp = src->buf + *p; 1340 size_t nb = src->len - *p; 1341 long i = *p; 1342 long r; 1343 bool neg = *sp == '-'; 1344 1345 sp += neg; 1346 nb -= neg; 1347 if (unlikely(nb <= 0)) { 1348 *p = sp - ss; 1349 return -ERR_EOF; 1350 } 1351 1352 if (unlikely(nb > 0 && (*sp > '9' || *sp < '0'))) { 1353 *p = sp - ss; 1354 return -ERR_INVAL; 1355 } 1356 1357 r = do_skip_number(sp, nb); 1358 if (unlikely(r < 0)) { 1359 *p = sp - (r + 1) - ss; 1360 return -ERR_INVAL; 1361 } 1362 *p = sp + r - ss; 1363 return i; 1364 } 1365 1366 static always_inline long skip_one_1(const GoString *src, long *p, StateMachine *m, uint64_t flags) { 1367 fsm_init(m, FSM_VAL); 1368 return fsm_exec_1(m, src, p, flags); 1369 } 1370 1371 static always_inline uint64_t get_maskx64(const char *s, char c) { 1372 #if USE_AVX2 1373 __m256i v0 = _mm256_loadu_si256((__m256i const *)s); 1374 __m256i v1 = _mm256_loadu_si256((__m256i const *)(s + 32)); 1375 uint32_t m0 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(v0, _mm256_set1_epi8(c))); 1376 uint32_t m1 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, _mm256_set1_epi8(c))); 1377 return ((uint64_t)(m1) << 32) | (uint64_t)(m0); 1378 #else 1379 __m128i v0 = _mm_loadu_si128((__m128i const*)s); 1380 __m128i v1 = _mm_loadu_si128((__m128i const*)(s + 16)); 1381 __m128i v2 = _mm_loadu_si128((__m128i const*)(s + 32)); 1382 __m128i v3 = _mm_loadu_si128((__m128i const*)(s + 48)); 1383 uint32_t m0 = _mm_movemask_epi8(_mm_cmpeq_epi8(v0, _mm_set1_epi8(c))); 1384 uint32_t m1 = _mm_movemask_epi8(_mm_cmpeq_epi8(v1, _mm_set1_epi8(c))); 1385 uint32_t m2 = _mm_movemask_epi8(_mm_cmpeq_epi8(v2, _mm_set1_epi8(c))); 1386 uint32_t m3 = _mm_movemask_epi8(_mm_cmpeq_epi8(v3, _mm_set1_epi8(c))); 1387 return ((uint64_t)(m3) << 48) | ((uint64_t)(m2) << 32) | ((uint64_t)(m1) << 16) | (uint64_t)(m0); 1388 #endif 1389 } 1390 1391 static always_inline uint64_t get_maskx32(const char *s, char c) { 1392 #if USE_AVX2 1393 __m256i v0 = _mm256_loadu_si256((__m256i const *)s); 1394 uint64_t m0 = (unsigned)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v0, _mm256_set1_epi8(c))); 1395 return m0; 1396 #else 1397 __m128i v0 = _mm_loadu_si128((__m128i const*)s); 1398 __m128i v1 = _mm_loadu_si128((__m128i const*)(s + 16)); 1399 uint64_t m0 = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(v0, _mm_set1_epi8(c))); 1400 uint64_t m1 = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, _mm_set1_epi8(c))); 1401 return m0 | (m1 << 16); 1402 #endif 1403 } 1404 1405 // get the string (besides in quote) mask 1406 static always_inline uint64_t get_string_maskx64(const char *s, uint64_t *prev_inquote, uint64_t *prev_bs) { 1407 uint64_t escaped = *prev_bs; 1408 uint64_t quote_mask = 0, bs_mask = 0; 1409 1410 /* read and get the quote or backslash bitmask */ 1411 quote_mask = get_maskx64(s, '"'); 1412 bs_mask = get_maskx64(s, '\\'); 1413 1414 /* get the escaped bitmask */ 1415 if (bs_mask || *prev_bs) { 1416 bs_mask &= ~(*prev_bs); 1417 uint64_t follow_bs = (bs_mask << 1) | *prev_bs; 1418 uint64_t bs_start = bs_mask & ~follow_bs; 1419 uint64_t odd_start = bs_start & ODD_MASK; 1420 uint64_t even_or_oc = add64(odd_start, bs_mask, prev_bs); 1421 uint64_t even_or_escaped = (even_or_oc << 1) ^ EVEN_MASK; 1422 escaped = follow_bs & even_or_escaped; 1423 } else { 1424 *prev_bs = 0; 1425 } 1426 quote_mask &= ~escaped; 1427 1428 /* get the inquote bitmask */ 1429 uint64_t inquote = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0, quote_mask), _mm_set1_epi8('\xFF'), 0)); 1430 inquote ^= *prev_inquote; 1431 *prev_inquote = (uint64_t)(((int64_t)(inquote)) >> 63); 1432 return inquote; 1433 } 1434 1435 // get the next json structural, '}', ']' or ','。 1436 #if USE_AVX2 1437 static always_inline int get_structural_maskx32(const char *s) { 1438 __m256i v = _mm256_loadu_si256((const void *)s); 1439 __m256i e1 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('}')); 1440 __m256i e2 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8(']')); 1441 __m256i e3 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8(',')); 1442 __m256i sv = _mm256_or_si256(_mm256_or_si256(e1, e2), e3); 1443 return _mm256_movemask_epi8(sv); 1444 } 1445 #endif 1446 1447 static always_inline int get_structural_maskx16(const char *s) { 1448 __m128i v = _mm_loadu_si128((const void *)s); 1449 __m128i e1 = _mm_cmpeq_epi8(v, _mm_set1_epi8('}')); 1450 __m128i e2 = _mm_cmpeq_epi8(v, _mm_set1_epi8(']')); 1451 __m128i e3 = _mm_cmpeq_epi8(v, _mm_set1_epi8(',')); 1452 __m128i sv = _mm_or_si128(_mm_or_si128(e1, e2), e3); 1453 return _mm_movemask_epi8(sv); 1454 } 1455 1456 // skip the number at the next '}', ']' or ',' or the ending of json. 1457 static always_inline long skip_number_fast(const GoString *src, long *p) { 1458 size_t nb = src->len - *p; 1459 const char *s = src->buf + *p; 1460 long vi = *p - 1; 1461 int m = 0; 1462 1463 #if USE_AVX2 1464 while (likely(nb >= 32)) { 1465 if ((m = get_structural_maskx32(s))) { 1466 *p = s - src->buf + __builtin_ctzll(m); 1467 return vi; 1468 } 1469 s += 32, nb -= 32; 1470 } 1471 #endif 1472 1473 while (likely(nb >= 16)) { 1474 if ((m = get_structural_maskx16(s))) { 1475 *p = s - src->buf + __builtin_ctzll(m); 1476 return vi; 1477 } 1478 s += 16, nb -= 16; 1479 } 1480 1481 while (likely(nb > 0)) { 1482 if (*s == '}' || *s == ']' || *s == ',') { 1483 *p = s - src->buf; 1484 return vi; 1485 } 1486 s++, nb--; 1487 } 1488 *p = s - src->buf; 1489 return vi; 1490 } 1491 1492 static always_inline long skip_container_fast(const GoString *src, long *p, char lc, char rc) { 1493 long nb = src->len - *p; 1494 const char *s = src->buf + *p; 1495 long vi = *p - 1; 1496 1497 uint64_t prev_inquote = 0, prev_bs = 0; 1498 uint64_t lbrace = 0, rbrace = 0; 1499 size_t lnum = 0, rnum = 0, last_lnum = 0; 1500 uint64_t inquote = 0; 1501 1502 while (likely(nb >= 64)) { 1503 skip: 1504 inquote = get_string_maskx64(s, &prev_inquote, &prev_bs); 1505 lbrace = get_maskx64(s, lc) & ~inquote; 1506 rbrace = get_maskx64(s, rc) & ~inquote; 1507 1508 /* traverse each right brace */ 1509 last_lnum = lnum; 1510 while (rbrace > 0) { 1511 uint64_t lbrace_first = (rbrace - 1) & lbrace; 1512 lnum = last_lnum + __builtin_popcountll((int64_t)lbrace_first); 1513 bool is_closed = lnum <= rnum; 1514 if (is_closed) { 1515 *p = src->len - nb + __builtin_ctzll(rbrace) + 1; 1516 // *p is out-of-bound access here 1517 if (*p > src->len) { 1518 *p = src->len; 1519 return -ERR_EOF; 1520 } 1521 return vi; 1522 } 1523 rbrace &= (rbrace - 1); // clear the lowest right brace 1524 rnum ++; 1525 } 1526 lnum = last_lnum + __builtin_popcountll((int64_t)lbrace); 1527 s += 64, nb -= 64; 1528 } 1529 1530 if (nb <= 0) { 1531 *p = src->len; 1532 return -ERR_EOF; 1533 } 1534 1535 char tbuf[64] = {0}; 1536 bool cross_page = vec_cross_page(s, 64); 1537 if (cross_page) { 1538 memcpy_p64(tbuf, s, nb); 1539 s = tbuf; 1540 } 1541 goto skip; 1542 } 1543 1544 static always_inline long skip_object_fast(const GoString *src, long *p) { 1545 return skip_container_fast(src, p, '{', '}'); 1546 } 1547 1548 static always_inline long skip_array_fast(const GoString *src, long *p) { 1549 return skip_container_fast(src, p, '[', ']'); 1550 } 1551 1552 static always_inline long skip_string_fast(const GoString *src, long *p) { 1553 const char* s = src->buf + *p; 1554 long nb = src->len - *p; 1555 long vi = *p - 1; 1556 uint64_t prev_bs = 0, escaped; 1557 1558 while (likely(nb >= 32)) { 1559 uint32_t quote = get_maskx32(s, '"'); 1560 uint32_t bs_mask = get_maskx32(s, '\\'); 1561 if (bs_mask || prev_bs) { 1562 bs_mask &= ~prev_bs; 1563 uint64_t follow_bs = (bs_mask << 1) | prev_bs; 1564 uint64_t bs_start = bs_mask & ~follow_bs; 1565 uint64_t odd_start = bs_start & ODD_MASK; 1566 uint64_t even_or_oc = add32(odd_start, bs_mask, &prev_bs); 1567 uint64_t even_or_escaped = (even_or_oc << 1) ^ EVEN_MASK; 1568 escaped = follow_bs & even_or_escaped; 1569 quote &= ~escaped; 1570 } 1571 if (quote) { 1572 *p = s + __builtin_ctzll(quote) + 1 - src->buf; 1573 return vi; 1574 } 1575 s += 32; 1576 nb -= 32; 1577 } 1578 1579 if (unlikely(prev_bs != 0)) { 1580 if (nb == 0) return -ERR_EOF; 1581 s++, nb--; 1582 } 1583 1584 while (likely(nb > 0)) { 1585 if (*s == '\\') { 1586 s += 2, nb -= 2; 1587 continue; 1588 } 1589 if (*s == '"') { 1590 *p = s - src->buf + 1; 1591 return vi; 1592 } 1593 s++, nb--; 1594 } 1595 return -ERR_EOF; 1596 } 1597 1598 static always_inline long skip_one_fast_1(const GoString *src, long *p) { 1599 char c = advance_ns(src, p); 1600 /* set the start address */ 1601 long vi = *p - 1; 1602 switch (c) { 1603 case '[': return skip_array_fast(src, p); 1604 case '{': return skip_object_fast(src, p); 1605 case '"': return skip_string_fast(src, p); 1606 case '-': case '0' ... '9': return skip_number_fast(src, p); 1607 case 't': case 'n': { if (*p + 3 <= src->len) { *p += 3; } else { return -ERR_EOF; } }; break; 1608 case 'f': { if (*p + 4 <= src->len) { *p += 4; } else { return -ERR_EOF; } }; break; 1609 case 0 : return -ERR_EOF; 1610 default : *p -= 1; return -ERR_INVAL; // backward error position 1611 } 1612 return vi; 1613 } 1614 1615 1616 static always_inline GoKind kind(const GoIface* iface) { 1617 return (iface->type->kind_flags) & GO_KIND_MASK; 1618 } 1619 1620 static always_inline bool is_int(const GoIface* iface) { 1621 return iface->type != NULL && kind(iface) == Int; 1622 } 1623 1624 static always_inline bool is_str(const GoIface* iface) { 1625 return iface->type != NULL && kind(iface) == String; 1626 } 1627 1628 static always_inline GoString get_str(const GoIface* iface) { 1629 return *(GoString*)(iface->value); 1630 } 1631 1632 static always_inline int64_t get_int(const GoIface* iface) { 1633 return *(int64_t*)(iface->value); 1634 } 1635 1636 // xmemcmpeq return true if s1 and s2 is equal for the n bytes, otherwise, return false. 1637 static always_inline bool xmemcmpeq(const char * s1, const char * s2, size_t n) { 1638 bool c1, c2; 1639 #if USE_AVX2 1640 while (n >= 32) { 1641 __m256i v1 = _mm256_loadu_si256((const void *)s1); 1642 __m256i v2 = _mm256_loadu_si256((const void *)s2); 1643 uint32_t mask = ~((uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2))); 1644 if (mask) return false; 1645 s1 += 32; 1646 s2 += 32; 1647 n -= 32; 1648 }; 1649 c1 = vec_cross_page(s1, 32); 1650 c2 = vec_cross_page(s2, 32); 1651 // not cross page 1652 if (!c1 && !c2) { 1653 __m256i v1 = _mm256_loadu_si256((const void *)s1); 1654 __m256i v2 = _mm256_loadu_si256((const void *)s2); 1655 uint32_t mask = ~((uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2))); 1656 bool eq = (mask == 0) || (__builtin_ctzll(mask) >= n); 1657 return eq; 1658 } 1659 #endif 1660 while (n >= 16) { 1661 __m128i v1 = _mm_loadu_si128((const void *)s1); 1662 __m128i v2 = _mm_loadu_si128((const void *)s2); 1663 uint16_t mask = ~((uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2))); 1664 if (mask != 0) return false; 1665 s1 += 16; 1666 s2 += 16; 1667 n -= 16; 1668 }; 1669 c1 = vec_cross_page(s1, 16); 1670 c2 = vec_cross_page(s2, 16); 1671 // not cross page 1672 if (!c1 && !c2) { 1673 __m128i v1 = _mm_loadu_si128((const void *)s1); 1674 __m128i v2 = _mm_loadu_si128((const void *)s2); 1675 uint16_t mask = ~((uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2))); 1676 bool eq = (mask == 0) || (__builtin_ctzll(mask) >= n); 1677 return eq; 1678 } 1679 // cross page 1680 while (n > 0 && *s1++ == *s2++) n--; 1681 return n == 0; 1682 } 1683 1684 // match_key return negative if errors, zero if not matched, one if matched. 1685 static always_inline long match_key(const GoString *src, long *p, const GoString key) { 1686 static const long not_match = 0; 1687 int64_t v = -1; 1688 long si = *p; 1689 long se = advance_string_default(src, *p, &v); 1690 if (unlikely(se < 0)) { 1691 *p = src->len; 1692 return -ERR_EOF; 1693 } 1694 1695 /* update position */ 1696 *p = se; 1697 1698 /* compare non-escaped strings */ 1699 if (likely(v == -1 || v > se)) { 1700 long sn = se - si - 1; 1701 1702 // check empty keys 1703 if (!sn && !key.len) { 1704 return true; 1705 } 1706 1707 return sn == key.len && xmemcmpeq(src->buf + si, key.buf, key.len); 1708 } 1709 1710 /* deal with escaped strings */ 1711 char buf[8] = {0}; // escaped buffer 1712 const char* sp = src->buf + si; 1713 const char* end = src->buf + se - 1; 1714 const char* kp = key.buf; 1715 const char* ke = key.buf + key.len; 1716 while (sp < end && kp < ke) { 1717 if (*sp == '\\') { 1718 long en = unescape(&sp, end, buf); 1719 if (en < 0) { 1720 *p = sp - src->buf; 1721 return en; 1722 } 1723 const char* ee = buf + en; 1724 const char* ep = buf; 1725 while (kp < ke && ep < ee && *kp == *ep) kp++, ep++; 1726 if (ep != ee) { 1727 return not_match; 1728 } 1729 } else if (*sp == *kp) { 1730 sp++, kp++; 1731 } else { 1732 return not_match; 1733 } 1734 }; 1735 return sp == end && kp == ke; 1736 }