github.com/cloudwego/dynamicgo@v0.2.6-0.20240519101509-707f41b6b834/native/scanning.c (about) 1 /* 2 * Copyright 2023 CloudWeGo Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "native.h" 18 #include "scanning.h" 19 20 static const char *CS_ARRAY = "[]{},\"[]{},\"[]{}"; 21 static const char *CS_OBJECT = "[]{},:\"[]{}:,\"[]"; 22 23 static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa; 24 static const uint64_t EVEN_MASK = 0x5555555555555555; 25 26 static const double P10_TAB[23] = { 27 /* <= the connvertion to double is not exact when less than 1 => */ 1e-000, 28 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 29 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 30 1e+021, 1e+022 /* <= the connvertion to double is not exact when larger, => */ 31 }; 32 33 static inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) 34 { 35 uint32_t v; 36 uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v); 37 38 /* set the carry */ 39 *vo = c; 40 return v; 41 } 42 43 static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) 44 { 45 uint64_t v; 46 uint64_t c = __builtin_uaddll_overflow(v1, v2, &v); 47 48 /* set the carry */ 49 *vo = c; 50 return v; 51 } 52 53 static inline char isspace(char ch) 54 { 55 return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t'; 56 } 57 58 static inline void vdigits(const GoString *src, long *p, JsonState *ret) 59 { 60 --*p; 61 vnumber(src, p, ret); 62 } 63 64 char advance_ns(const GoString *src, long *p) 65 { 66 size_t vi = *p; 67 size_t nb = src->len; 68 const char *sp = src->buf; 69 70 /* it's likely to run into non-spaces within a few 71 * characters, so test up to 4 characters manually */ 72 if (vi < nb && !isspace(sp[vi])) 73 goto nospace; 74 else 75 vi++; 76 if (vi < nb && !isspace(sp[vi])) 77 goto nospace; 78 else 79 vi++; 80 if (vi < nb && !isspace(sp[vi])) 81 goto nospace; 82 else 83 vi++; 84 if (vi < nb && !isspace(sp[vi])) 85 goto nospace; 86 else 87 vi++; 88 89 /* check EOF */ 90 if (vi >= nb) 91 { 92 *p = vi; 93 return 0; 94 } 95 96 /* too many spaces, use SIMD to search for characters */ 97 if ((vi = lspace(sp, nb, vi)) >= nb) 98 { 99 return 0; 100 } 101 102 nospace: 103 *p = vi + 1; 104 return src->buf[vi]; 105 } 106 107 int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val) 108 { 109 if (*p > src->len + dec - 4) 110 { 111 *p = src->len; 112 return -ERR_EOF; 113 } 114 else if (*(uint32_t *)(src->buf + *p - dec) == val) 115 { 116 *p += 4 - dec; 117 return ret; 118 } 119 else 120 { 121 *p -= dec; 122 for (int i = 0; src->buf[*p] == (val & 0xff); i++, ++*p) 123 { 124 val >>= 8; 125 } 126 return -ERR_INVAL; 127 } 128 } 129 130 static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep) 131 { 132 char ch; 133 uint64_t es; 134 uint64_t fe; 135 uint64_t os; 136 uint64_t m0; 137 uint64_t m1; 138 uint64_t cr = 0; 139 140 /* prevent out-of-bounds accessing */ 141 if (unlikely(src->len == p)) 142 { 143 return -ERR_EOF; 144 } 145 146 /* buffer pointers */ 147 size_t nb = src->len; 148 const char *sp = src->buf; 149 const char *ss = src->buf; 150 151 #define ep_init() *ep = -1; 152 #define ep_setc() ep_setx(sp - ss - 1) 153 #define ep_setx(x) \ 154 if (*ep == -1) \ 155 { \ 156 *ep = (x); \ 157 } 158 159 /* seek to `p` */ 160 nb -= p; 161 sp += p; 162 ep_init() 163 164 #if USE_AVX2 165 /* initialize vectors */ 166 __m256i v0; 167 __m256i v1; 168 __m256i q0; 169 __m256i q1; 170 __m256i x0; 171 __m256i x1; 172 __m256i cq = _mm256_set1_epi8('"'); 173 __m256i cx = _mm256_set1_epi8('\\'); 174 175 /* partial masks */ 176 uint32_t s0; 177 uint32_t s1; 178 uint32_t t0; 179 uint32_t t1; 180 #else 181 /* initialize vectors */ 182 __m128i v0; 183 __m128i v1; 184 __m128i v2; 185 __m128i v3; 186 __m128i q0; 187 __m128i q1; 188 __m128i q2; 189 __m128i q3; 190 __m128i x0; 191 __m128i x1; 192 __m128i x2; 193 __m128i x3; 194 __m128i cq = _mm_set1_epi8('"'); 195 __m128i cx = _mm_set1_epi8('\\'); 196 197 /* partial masks */ 198 uint32_t s0; 199 uint32_t s1; 200 uint32_t s2; 201 uint32_t s3; 202 uint32_t t0; 203 uint32_t t1; 204 uint32_t t2; 205 uint32_t t3; 206 #endif 207 208 #define m0_mask(add) \ 209 m1 &= ~cr; \ 210 fe = (m1 << 1) | cr; \ 211 os = (m1 & ~fe) & ODD_MASK; \ 212 es = add(os, m1, &cr) << 1; \ 213 m0 &= ~(fe & (es ^ EVEN_MASK)); 214 215 /* 64-byte SIMD loop */ 216 while (likely(nb >= 64)) 217 { 218 #if USE_AVX2 219 v0 = _mm256_loadu_si256((const void *)(sp + 0)); 220 v1 = _mm256_loadu_si256((const void *)(sp + 32)); 221 q0 = _mm256_cmpeq_epi8(v0, cq); 222 q1 = _mm256_cmpeq_epi8(v1, cq); 223 x0 = _mm256_cmpeq_epi8(v0, cx); 224 x1 = _mm256_cmpeq_epi8(v1, cx); 225 s0 = _mm256_movemask_epi8(q0); 226 s1 = _mm256_movemask_epi8(q1); 227 t0 = _mm256_movemask_epi8(x0); 228 t1 = _mm256_movemask_epi8(x1); 229 m0 = ((uint64_t)s1 << 32) | (uint64_t)s0; 230 m1 = ((uint64_t)t1 << 32) | (uint64_t)t0; 231 #else 232 v0 = _mm_loadu_si128((const void *)(sp + 0)); 233 v1 = _mm_loadu_si128((const void *)(sp + 16)); 234 v2 = _mm_loadu_si128((const void *)(sp + 32)); 235 v3 = _mm_loadu_si128((const void *)(sp + 48)); 236 q0 = _mm_cmpeq_epi8(v0, cq); 237 q1 = _mm_cmpeq_epi8(v1, cq); 238 q2 = _mm_cmpeq_epi8(v2, cq); 239 q3 = _mm_cmpeq_epi8(v3, cq); 240 x0 = _mm_cmpeq_epi8(v0, cx); 241 x1 = _mm_cmpeq_epi8(v1, cx); 242 x2 = _mm_cmpeq_epi8(v2, cx); 243 x3 = _mm_cmpeq_epi8(v3, cx); 244 s0 = _mm_movemask_epi8(q0); 245 s1 = _mm_movemask_epi8(q1); 246 s2 = _mm_movemask_epi8(q2); 247 s3 = _mm_movemask_epi8(q3); 248 t0 = _mm_movemask_epi8(x0); 249 t1 = _mm_movemask_epi8(x1); 250 t2 = _mm_movemask_epi8(x2); 251 t3 = _mm_movemask_epi8(x3); 252 m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0; 253 m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0; 254 #endif 255 256 /** update first quote position */ 257 if (unlikely(m1 != 0)) 258 { 259 ep_setx(sp - ss + __builtin_ctzll(m1)) 260 } 261 262 /** mask all the escaped quotes */ 263 if (unlikely(m1 != 0 || cr != 0)) 264 { 265 m0_mask(add64) 266 } 267 268 /* check for end quote */ 269 if (m0 != 0) 270 { 271 return sp - ss + __builtin_ctzll(m0) + 1; 272 } 273 274 /* move to the next block */ 275 sp += 64; 276 nb -= 64; 277 } 278 279 /* 32-byte SIMD round */ 280 if (likely(nb >= 32)) 281 { 282 #if USE_AVX2 283 v0 = _mm256_loadu_si256((const void *)sp); 284 q0 = _mm256_cmpeq_epi8(v0, cq); 285 x0 = _mm256_cmpeq_epi8(v0, cx); 286 s0 = _mm256_movemask_epi8(q0); 287 t0 = _mm256_movemask_epi8(x0); 288 m0 = (uint64_t)s0; 289 m1 = (uint64_t)t0; 290 #else 291 v0 = _mm_loadu_si128((const void *)(sp + 0)); 292 v1 = _mm_loadu_si128((const void *)(sp + 16)); 293 q0 = _mm_cmpeq_epi8(v0, cq); 294 q1 = _mm_cmpeq_epi8(v1, cq); 295 x0 = _mm_cmpeq_epi8(v0, cx); 296 x1 = _mm_cmpeq_epi8(v1, cx); 297 s0 = _mm_movemask_epi8(q0); 298 s1 = _mm_movemask_epi8(q1); 299 t0 = _mm_movemask_epi8(x0); 300 t1 = _mm_movemask_epi8(x1); 301 m0 = ((uint64_t)s1 << 16) | (uint64_t)s0; 302 m1 = ((uint64_t)t1 << 16) | (uint64_t)t0; 303 #endif 304 305 /** update first quote position */ 306 if (unlikely(m1 != 0)) 307 { 308 ep_setx(sp - ss + __builtin_ctzll(m1)) 309 } 310 311 /** mask all the escaped quotes */ 312 if (unlikely(m1 != 0 || cr != 0)) 313 { 314 m0_mask(add32) 315 } 316 317 /* check for end quote */ 318 if (m0 != 0) 319 { 320 return sp - ss + __builtin_ctzll(m0) + 1; 321 } 322 323 /* move to the next block */ 324 sp += 32; 325 nb -= 32; 326 } 327 328 /* check for carry */ 329 if (unlikely(cr != 0)) 330 { 331 if (nb == 0) 332 { 333 return -ERR_EOF; 334 } 335 else 336 { 337 ep_setc() 338 sp++, 339 nb--; 340 } 341 } 342 343 /* handle the remaining bytes with scalar code */ 344 while (nb-- > 0 && (ch = *sp++) != '"') 345 { 346 if (unlikely(ch == '\\')) 347 { 348 if (nb == 0) 349 { 350 return -ERR_EOF; 351 } 352 else 353 { 354 ep_setc() 355 sp++, 356 nb--; 357 } 358 } 359 } 360 361 #undef ep_init 362 #undef ep_setc 363 #undef ep_setx 364 #undef m0_mask 365 366 /* check for quotes */ 367 if (ch == '"') 368 { 369 return sp - ss; 370 } 371 else 372 { 373 return -ERR_EOF; 374 } 375 } 376 377 static inline int _mm_get_mask(__m128i v, __m128i t) 378 { 379 return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t)); 380 } 381 382 // contrl char: 0x00 ~ 0x1F 383 static inline int _mm_cchars_mask(__m128i v) 384 { 385 __m128i e1 = _mm_cmpgt_epi8(v, _mm_set1_epi8(-1)); 386 __m128i e2 = _mm_cmpgt_epi8(v, _mm_set1_epi8(31)); 387 return _mm_movemask_epi8(_mm_andnot_si128(e2, e1)); 388 } 389 390 #if USE_AVX2 391 392 static inline int _mm256_get_mask(__m256i v, __m256i t) 393 { 394 return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, t)); 395 } 396 397 // contrl char: 0x00 ~ 0x1F 398 static inline int _mm256_cchars_mask(__m256i v) 399 { 400 __m256i e1 = _mm256_cmpgt_epi8(v, _mm256_set1_epi8(-1)); 401 __m256i e2 = _mm256_cmpgt_epi8(v, _mm256_set1_epi8(31)); 402 return _mm256_movemask_epi8(_mm256_andnot_si256(e2, e1)); 403 } 404 405 #endif 406 407 static inline ssize_t advance_validate_string(const GoString *src, long p, int64_t *ep) 408 { 409 char ch; 410 uint64_t es; 411 uint64_t fe; 412 uint64_t os; 413 uint64_t m0; 414 uint64_t m1; 415 uint64_t m2; 416 uint64_t cr = 0; 417 long qp = 0; 418 long np = 0; 419 420 /* prevent out-of-bounds accessing */ 421 if (unlikely(src->len == p)) 422 { 423 return -ERR_EOF; 424 } 425 426 /* buffer pointers */ 427 size_t nb = src->len; 428 const char *sp = src->buf; 429 const char *ss = src->buf; 430 431 #define ep_init() *ep = -1; 432 #define ep_setc() ep_setx(sp - ss - 1) 433 #define ep_setx(x) \ 434 if (*ep == -1) \ 435 { \ 436 *ep = (x); \ 437 } 438 439 /* seek to `p` */ 440 nb -= p; 441 sp += p; 442 ep_init() 443 444 #if USE_AVX2 445 /* initialize vectors */ 446 __m256i v0; 447 __m256i v1; 448 __m256i cq = _mm256_set1_epi8('"'); 449 __m256i cx = _mm256_set1_epi8('\\'); 450 451 /* partial masks */ 452 uint32_t s0, s1; 453 uint32_t t0, t1; 454 uint32_t c0, c1; 455 #else 456 /* initialize vectors */ 457 __m128i v0; 458 __m128i v1; 459 __m128i v2; 460 __m128i v3; 461 __m128i cq = _mm_set1_epi8('"'); 462 __m128i cx = _mm_set1_epi8('\\'); 463 464 /* partial masks */ 465 uint32_t s0, s1, s2, s3; 466 uint32_t t0, t1, t2, t3; 467 uint32_t c0, c1, c2, c3; 468 #endif 469 470 #define m0_mask(add) \ 471 m1 &= ~cr; \ 472 fe = (m1 << 1) | cr; \ 473 os = (m1 & ~fe) & ODD_MASK; \ 474 es = add(os, m1, &cr) << 1; \ 475 m0 &= ~(fe & (es ^ EVEN_MASK)); 476 477 /* 64-byte SIMD loop */ 478 while (likely(nb >= 64)) 479 { 480 #if USE_AVX2 481 v0 = _mm256_loadu_si256((const void *)(sp + 0)); 482 v1 = _mm256_loadu_si256((const void *)(sp + 32)); 483 s0 = _mm256_get_mask(v0, cq); 484 s1 = _mm256_get_mask(v1, cq); 485 t0 = _mm256_get_mask(v0, cx); 486 t1 = _mm256_get_mask(v1, cx); 487 c0 = _mm256_cchars_mask(v0); 488 c1 = _mm256_cchars_mask(v1); 489 m0 = ((uint64_t)s1 << 32) | (uint64_t)s0; 490 m1 = ((uint64_t)t1 << 32) | (uint64_t)t0; 491 m2 = ((uint64_t)c1 << 32) | (uint64_t)c0; 492 #else 493 v0 = _mm_loadu_si128((const void *)(sp + 0)); 494 v1 = _mm_loadu_si128((const void *)(sp + 16)); 495 v2 = _mm_loadu_si128((const void *)(sp + 32)); 496 v3 = _mm_loadu_si128((const void *)(sp + 48)); 497 s0 = _mm_get_mask(v0, cq); 498 s1 = _mm_get_mask(v1, cq); 499 s2 = _mm_get_mask(v2, cq); 500 s3 = _mm_get_mask(v3, cq); 501 t0 = _mm_get_mask(v0, cx); 502 t1 = _mm_get_mask(v1, cx); 503 t2 = _mm_get_mask(v2, cx); 504 t3 = _mm_get_mask(v3, cx); 505 c0 = _mm_cchars_mask(v0); 506 c1 = _mm_cchars_mask(v1); 507 c2 = _mm_cchars_mask(v2); 508 c3 = _mm_cchars_mask(v3); 509 m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0; 510 m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0; 511 m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0; 512 513 #endif 514 515 /** update first quote position */ 516 if (unlikely(m1 != 0)) 517 { 518 ep_setx(sp - ss + __builtin_ctzll(m1)) 519 } 520 521 /** mask all the escaped quotes */ 522 if (unlikely(m1 != 0 || cr != 0)) 523 { 524 m0_mask(add64) 525 } 526 527 /* get the position of end quote */ 528 if (m0 != 0) 529 { 530 qp = sp - ss + __builtin_ctzll(m0) + 1; 531 /* check control chars in JSON string */ 532 if (unlikely(m2 != 0 && (np = sp - ss + __builtin_ctzll(m2)) < qp)) 533 { 534 ep_setx(np) // set error position 535 return -ERR_INVAL; 536 } 537 return qp; 538 } 539 540 /* check control chars in JSON string */ 541 if (unlikely(m2 != 0)) 542 { 543 ep_setx(sp - ss + __builtin_ctzll(m2)) return -ERR_INVAL; 544 } 545 546 /* move to the next block */ 547 sp += 64; 548 nb -= 64; 549 } 550 551 /* 32-byte SIMD round */ 552 if (likely(nb >= 32)) 553 { 554 #if USE_AVX2 555 v0 = _mm256_loadu_si256((const void *)sp); 556 s0 = _mm256_get_mask(v0, cq); 557 t0 = _mm256_get_mask(v0, cx); 558 c0 = _mm256_cchars_mask(v0); 559 m0 = (uint64_t)s0; 560 m1 = (uint64_t)t0; 561 m2 = (uint64_t)c0; 562 #else 563 v0 = _mm_loadu_si128((const void *)(sp + 0)); 564 v1 = _mm_loadu_si128((const void *)(sp + 16)); 565 s0 = _mm_get_mask(v0, cq); 566 s1 = _mm_get_mask(v1, cq); 567 t0 = _mm_get_mask(v0, cx); 568 t1 = _mm_get_mask(v1, cx); 569 c0 = _mm_cchars_mask(v0); 570 c1 = _mm_cchars_mask(v1); 571 m0 = ((uint64_t)s1 << 16) | (uint64_t)s0; 572 m1 = ((uint64_t)t1 << 16) | (uint64_t)t0; 573 m2 = ((uint64_t)c1 << 16) | (uint64_t)c0; 574 #endif 575 576 /** update first quote position */ 577 if (unlikely(m1 != 0)) 578 { 579 ep_setx(sp - ss + __builtin_ctzll(m1)) 580 } 581 582 /** mask all the escaped quotes */ 583 if (unlikely(m1 != 0 || cr != 0)) 584 { 585 m0_mask(add32) 586 } 587 588 /* get the position of end quote */ 589 if (m0 != 0) 590 { 591 qp = sp - ss + __builtin_ctzll(m0) + 1; 592 /* check control chars in JSON string */ 593 if (unlikely(m2 != 0 && (np = sp - ss + __builtin_ctzll(m2)) < qp)) 594 { 595 ep_setx(np) // set error position 596 return -ERR_INVAL; 597 } 598 return qp; 599 } 600 601 /* check control chars in JSON string */ 602 if (unlikely(m2 != 0)) 603 { 604 ep_setx(sp - ss + __builtin_ctzll(m2)) return -ERR_INVAL; 605 } 606 607 /* move to the next block */ 608 sp += 32; 609 nb -= 32; 610 } 611 612 /* check for carry */ 613 if (unlikely(cr != 0)) 614 { 615 if (nb == 0) 616 { 617 return -ERR_EOF; 618 } 619 else 620 { 621 ep_setc() 622 sp++, 623 nb--; 624 } 625 } 626 627 /* handle the remaining bytes with scalar code */ 628 while (nb-- > 0 && (ch = *sp++) != '"') 629 { 630 if (unlikely(ch == '\\')) 631 { 632 if (nb == 0) 633 { 634 return -ERR_EOF; 635 } 636 else 637 { 638 ep_setc() 639 sp++, 640 nb--; 641 } 642 } 643 else if (unlikely(ch >= 0 && ch <= 0x1f)) 644 { // control chars 645 ep_setc() return -ERR_INVAL; 646 } 647 } 648 649 #undef ep_init 650 #undef ep_setc 651 #undef ep_setx 652 #undef m0_mask 653 654 /* check for quotes */ 655 if (ch == '"') 656 { 657 return sp - ss; 658 } 659 else 660 { 661 return -ERR_EOF; 662 } 663 } 664 665 /** Value Scanning Routines **/ 666 667 long value(const char *s, size_t n, long p, JsonState *ret, int allow_control) 668 { 669 long q = p; 670 GoString m = {.buf = s, .len = n}; 671 672 /* parse the next identifier, q is UNSAFE, may cause out-of-bounds accessing */ 673 switch (advance_ns(&m, &q)) 674 { 675 case '-': /* fallthrough */ 676 case '0': /* fallthrough */ 677 case '1': /* fallthrough */ 678 case '2': /* fallthrough */ 679 case '3': /* fallthrough */ 680 case '4': /* fallthrough */ 681 case '5': /* fallthrough */ 682 case '6': /* fallthrough */ 683 case '7': /* fallthrough */ 684 case '8': /* fallthrough */ 685 case '9': 686 vdigits(&m, &q, ret); 687 return q; 688 case '"': 689 vstring(&m, &q, ret); 690 return q; 691 case 'n': 692 ret->vt = advance_dword(&m, &q, 1, V_NULL, VS_NULL); 693 return q; 694 case 't': 695 ret->vt = advance_dword(&m, &q, 1, V_TRUE, VS_TRUE); 696 return q; 697 case 'f': 698 ret->vt = advance_dword(&m, &q, 0, V_FALSE, VS_ALSE); 699 return q; 700 case '[': 701 ret->vt = V_ARRAY; 702 return q; 703 case '{': 704 ret->vt = V_OBJECT; 705 return q; 706 case ':': 707 ret->vt = allow_control ? V_KEY_SEP : -ERR_INVAL; 708 return allow_control ? q : q - 1; 709 case ',': 710 ret->vt = allow_control ? V_ELEM_SEP : -ERR_INVAL; 711 return allow_control ? q : q - 1; 712 case ']': 713 ret->vt = allow_control ? V_ARRAY_END : -ERR_INVAL; 714 return allow_control ? q : q - 1; 715 case '}': 716 ret->vt = allow_control ? V_OBJECT_END : -ERR_INVAL; 717 return allow_control ? q : q - 1; 718 case 0: 719 ret->vt = V_EOF; 720 return q; 721 default: 722 ret->vt = -ERR_INVAL; 723 return q - 1; 724 } 725 } 726 727 void vstring(const GoString *src, long *p, JsonState *ret) 728 { 729 int64_t v = -1; 730 int64_t i = *p; 731 ssize_t e = advance_string(src, i, &v); 732 733 /* check for errors */ 734 if (e < 0) 735 { 736 *p = src->len; 737 ret->vt = e; 738 return; 739 } 740 741 /* update the result, and fix the escape position (escaping past the end of string) */ 742 *p = e; 743 ret->iv = i; 744 ret->vt = V_STRING; 745 ret->ep = v >= e ? -1 : v; 746 } 747 748 #define set_vt(t) \ 749 ret->vt = t; 750 751 #define init_ret(t) \ 752 ret->vt = t; \ 753 ret->dv = 0.0; \ 754 ret->iv = 0; \ 755 ret->ep = *p; 756 757 #define check_eof() \ 758 if (i >= n) \ 759 { \ 760 *p = n; \ 761 ret->vt = -ERR_EOF; \ 762 return; \ 763 } 764 765 #define check_sign(on_neg) \ 766 if (s[i] == '-') \ 767 { \ 768 i++; \ 769 on_neg; \ 770 check_eof() \ 771 } 772 773 #define check_digit() \ 774 if (s[i] < '0' || s[i] > '9') \ 775 { \ 776 *p = i; \ 777 ret->vt = -ERR_INVAL; \ 778 return; \ 779 } 780 781 #define check_leading_zero() \ 782 if (s[i] == '0' && (i >= n || (s[i + 1] != '.' && s[i + 1] != 'e' && s[i + 1] != 'E'))) \ 783 { \ 784 *p = ++i; \ 785 return; \ 786 } 787 788 #define parse_sign(sgn) \ 789 if (s[i] == '+' || s[i] == '-') \ 790 { \ 791 sgn = s[i++] == '+' ? 1 : -1; \ 792 check_eof() \ 793 } 794 795 #define is_digit(val) \ 796 '0' <= val &&val <= '9' 797 798 #define add_integer_to_mantissa(man, man_nd, exp10, dig) \ 799 if (man_nd < 19) \ 800 { \ 801 man = man * 10 + dig; \ 802 man_nd++; \ 803 } \ 804 else \ 805 { \ 806 exp10++; \ 807 } 808 809 #define add_float_to_mantissa(man, man_nd, exp10, dig) \ 810 man = man * 10 + dig; \ 811 man_nd++; \ 812 exp10--; 813 814 #define parse_float_digits(val, sgn, ...) \ 815 while (i < n && s[i] >= '0' && s[i] <= '9' __VA_ARGS__) \ 816 { \ 817 val *= 10; \ 818 val += sgn * (s[i++] - '0'); \ 819 } 820 821 #define parse_integer_digits(val, sgn, ovf) \ 822 while (i < n && s[i] >= '0' && s[i] <= '9') \ 823 { \ 824 if (add_digit_overflow(val, sgn * (s[i++] - '0'))) \ 825 { \ 826 ovf = 1; \ 827 break; \ 828 } \ 829 } 830 831 #define add_digit_overflow(val, chr) ( \ 832 __builtin_mul_overflow(val, 10, &val) || \ 833 __builtin_add_overflow(val, chr, &val)) 834 835 #define vinteger(type, sgn, on_neg) \ 836 int ovf = 0; \ 837 type val = 0; \ 838 \ 839 /* initial buffer pointers */ \ 840 long i = *p; \ 841 size_t n = src->len; \ 842 const char *s = src->buf; \ 843 \ 844 /* initialize the result, and check for '-' */ \ 845 init_ret(V_INTEGER) \ 846 check_eof() \ 847 check_sign(on_neg) \ 848 \ 849 /* check for leading zero or any digits */ \ 850 check_digit() \ 851 check_leading_zero() \ 852 parse_integer_digits(val, sgn, ovf) \ 853 \ 854 /* check for overflow */ \ 855 if (ovf) \ 856 { \ 857 *p = i - 1; \ 858 ret->vt = -ERR_OVERFLOW; \ 859 return; \ 860 } \ 861 \ 862 /* check for the decimal part */ \ 863 if (i < n && s[i] == '.') \ 864 { \ 865 *p = i; \ 866 ret->vt = -ERR_NUMBER_FMT; \ 867 return; \ 868 } \ 869 \ 870 /* check for the exponent part */ \ 871 if (i < n && (s[i] == 'e' || s[i] == 'E')) \ 872 { \ 873 *p = i; \ 874 ret->vt = -ERR_NUMBER_FMT; \ 875 return; \ 876 } \ 877 \ 878 /* update the result */ \ 879 *p = i; \ 880 ret->iv = val; 881 882 /** check whether float can represent the val exactly **/ 883 static inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) 884 { 885 *val = (double)man; 886 887 if (man >> 52 != 0) 888 { 889 return false; 890 } 891 892 /* equal to if (sgn == -1) { *val *= -1; } */ 893 *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63); 894 895 if (exp == 0 || man == 0) 896 { 897 return true; 898 } 899 else if (exp > 0 && exp <= 15 + 22) 900 { 901 /* uint64 integers: accurate range <= 10^15 * 902 * Powers of 10: accurate range <= 10^22, as P10_TAB * 903 * Example: man 1, exp 36, is ok */ 904 if (exp > 22) 905 { 906 *val *= P10_TAB[exp - 22]; 907 exp = 22; 908 } 909 910 /* f is not accurate when too larger */ 911 if (*val > 1e15 || *val < -1e15) 912 { 913 return false; 914 } 915 916 *val *= P10_TAB[exp]; 917 return true; 918 } 919 else if (exp < 0 && exp >= -22) 920 { 921 *val /= P10_TAB[-exp]; 922 return true; 923 } 924 925 return false; 926 } 927 928 static inline double atof_fast(uint64_t man, int exp, int sgn, int trunc, double *val) 929 { 930 double val_up = 0.0; 931 932 /* look-up for fast atof if the conversion can be exactly */ 933 if (is_atof_exact(man, exp, sgn, val)) 934 { 935 return true; 936 } 937 938 /* A fast atof algorithm for high percison */ 939 if (atof_eisel_lemire64(man, exp, sgn, val)) 940 { 941 if (!trunc || (atof_eisel_lemire64(man + 1, exp, sgn, &val_up) && val_up == *val)) 942 { 943 return true; 944 } 945 } 946 947 return false; 948 } 949 950 static bool inline is_overflow(uint64_t man, int sgn, int exp10) 951 { 952 /* the former exp10 != 0 means man has overflowed 953 * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */ 954 return exp10 != 0 || 955 ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63)); 956 } 957 958 void vnumber(const GoString *src, long *p, JsonState *ret) 959 { 960 int sgn = 1; 961 uint64_t man = 0; // mantissa for double (float64) 962 int man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t 963 int exp10 = 0; // val = sgn * man * 10 ^ exp10 964 int trunc = 0; 965 double val = 0; 966 967 /* initial buffer pointers */ 968 long i = *p; 969 size_t n = src->len; 970 const char *s = src->buf; 971 char *dbuf = ret->dbuf; 972 ssize_t dcap = ret->dcap; 973 974 /* initialize the result, and check for EOF */ 975 init_ret(V_INTEGER) 976 check_eof() 977 check_sign(sgn = -1) 978 979 /* check for leading zero */ 980 check_digit() 981 check_leading_zero() 982 983 /* parse the integer part */ 984 while (i < n && is_digit(s[i])) 985 { 986 add_integer_to_mantissa(man, man_nd, exp10, (s[i] - '0')) 987 i++; 988 } 989 990 if (exp10 > 0) 991 { 992 trunc = 1; 993 } 994 995 /* check for decimal points */ 996 if (i < n && s[i] == '.') 997 { 998 i++; 999 set_vt(V_DOUBLE) 1000 check_eof() 1001 check_digit() 1002 } 1003 1004 /* skip the leading zeros of 0.000xxxx */ 1005 if (man == 0 && exp10 == 0) 1006 { 1007 while (i < n && s[i] == '0') 1008 { 1009 i++; 1010 exp10--; 1011 } 1012 man = 0; 1013 man_nd = 0; 1014 } 1015 1016 /* the fractional part (uint64_t mantissa can represent at most 19 digits) */ 1017 while (i < n && man_nd < 19 && is_digit(s[i])) 1018 { 1019 add_float_to_mantissa(man, man_nd, exp10, (s[i] - '0')) 1020 i++; 1021 } 1022 1023 /* skip the remaining digits */ 1024 while (i < n && is_digit(s[i])) 1025 { 1026 trunc = 1; 1027 i++; 1028 } 1029 1030 /* check for exponent */ 1031 if (i < n && (s[i] == 'e' || s[i] == 'E')) 1032 { 1033 int esm = 1; 1034 int exp = 0; 1035 1036 /* check for the '+' or '-' sign, and parse the power */ 1037 i++; 1038 set_vt(V_DOUBLE) 1039 check_eof() 1040 parse_sign(esm) 1041 check_digit() while (i < n && is_digit(s[i])) 1042 { 1043 if (exp < 10000) 1044 { 1045 exp = exp * 10 + (s[i] - '0'); 1046 } 1047 i++; 1048 } 1049 exp10 += exp * esm; 1050 goto parse_float; 1051 } 1052 1053 if (ret->vt == V_INTEGER) 1054 { 1055 if (!is_overflow(man, sgn, exp10)) 1056 { 1057 ret->iv = (int64_t)man * sgn; 1058 /* following lines equal to ret->dv = (double)(man) * sgn */ 1059 ret->dv = (double)(man); 1060 *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63); 1061 *p = i; 1062 return; 1063 } 1064 set_vt(V_DOUBLE) 1065 } 1066 1067 parse_float: 1068 /* when fast algorithms failed, use slow fallback.*/ 1069 if (!atof_fast(man, exp10, sgn, trunc, &val)) 1070 { 1071 val = atof_native(s + *p, i - *p, dbuf, dcap); 1072 } 1073 1074 /* check parsed double val */ 1075 if (is_infinity(val)) 1076 { 1077 ret->vt = -ERR_FLOAT_INF; 1078 } 1079 1080 /* update the result */ 1081 ret->dv = val; 1082 *p = i; 1083 } 1084 1085 void vsigned(const GoString *src, long *p, JsonState *ret) 1086 { 1087 int64_t sgn = 1; 1088 vinteger(int64_t, sgn, sgn = -1) 1089 } 1090 1091 void vunsigned(const GoString *src, long *p, JsonState *ret) 1092 { 1093 vinteger(uint64_t, 1, { 1094 *p = i - 1; 1095 ret->vt = -ERR_NUMBER_FMT; 1096 return; 1097 }) 1098 } 1099 1100 #undef init_ret 1101 #undef check_eof 1102 #undef check_digit 1103 #undef check_leading_zero 1104 #undef parse_sign 1105 #undef is_digit 1106 #undef add_integer_to_mantissa 1107 #undef add_float_to_mantissa 1108 #undef parse_float_digits 1109 #undef parse_integer_digits 1110 #undef add_digit_overflow 1111 #undef vinteger 1112 1113 /** Value Skipping FSM **/ 1114 1115 // static inline void FSM_INIT(StateMachine *self, int vt) 1116 // { 1117 // self->sp = 1; 1118 // self->vt[0] = vt; 1119 // } 1120 1121 // static inline long fsm_push(StateMachine *self, int vt) 1122 // { 1123 // if (self->sp >= MAX_RECURSE) 1124 // { 1125 // return -ERR_RECURSE_MAX; 1126 // } 1127 // else 1128 // { 1129 // self->vt[self->sp++] = vt; 1130 // return 0; 1131 // } 1132 // } 1133 1134 static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, int validate_flag) 1135 { 1136 int vt; 1137 char ch; 1138 long vi = -1; 1139 1140 /* run until no more nested values */ 1141 while (self->sp) 1142 { 1143 ch = advance_ns(src, p); 1144 vt = self->vt[self->sp - 1]; 1145 1146 /* set the start address if any */ 1147 if (vi == -1) 1148 { 1149 vi = *p - 1; 1150 } 1151 1152 /* check for special types */ 1153 switch (vt) 1154 { 1155 default: 1156 { 1157 FSM_DROP(self); 1158 break; 1159 } 1160 1161 /* arrays */ 1162 case FSM_ARR: 1163 { 1164 switch (ch) 1165 { 1166 case ']': 1167 FSM_DROP(self); 1168 continue; 1169 case ',': 1170 FSM_PUSH(self, FSM_VAL); 1171 continue; 1172 default: 1173 return -ERR_INVAL; 1174 } 1175 } 1176 1177 /* objects */ 1178 case FSM_OBJ: 1179 { 1180 switch (ch) 1181 { 1182 case '}': 1183 FSM_DROP(self); 1184 continue; 1185 case ',': 1186 FSM_PUSH(self, FSM_KEY); 1187 continue; 1188 default: 1189 return -ERR_INVAL; 1190 } 1191 } 1192 1193 /* object keys */ 1194 case FSM_KEY: 1195 { 1196 FSM_CHAR('"'); 1197 FSM_REPL(self, FSM_ELEM); 1198 FSM_XERR(skip_string(src, p)); 1199 continue; 1200 } 1201 1202 /* object element */ 1203 case FSM_ELEM: 1204 { 1205 FSM_CHAR(':'); 1206 FSM_REPL(self, FSM_VAL); 1207 continue; 1208 } 1209 1210 /* arrays, first element */ 1211 case FSM_ARR_0: 1212 { 1213 if (ch == ']') 1214 { 1215 FSM_DROP(self); 1216 continue; 1217 } 1218 else 1219 { 1220 FSM_REPL(self, FSM_ARR); 1221 break; 1222 } 1223 } 1224 1225 /* objects, first pair */ 1226 case FSM_OBJ_0: 1227 { 1228 switch (ch) 1229 { 1230 default: 1231 { 1232 return -ERR_INVAL; 1233 } 1234 1235 /* empty object */ 1236 case '}': 1237 { 1238 FSM_DROP(self); 1239 continue; 1240 } 1241 1242 /* the quote of the first key */ 1243 case '"': 1244 { 1245 FSM_REPL(self, FSM_OBJ); 1246 if (validate_flag == VALID_DEFAULT) 1247 { 1248 FSM_XERR(skip_string(src, p)); 1249 } 1250 else if (validate_flag == VALID_FULL) 1251 { 1252 FSM_XERR(validate_string(src, p)); 1253 } 1254 FSM_PUSH(self, FSM_ELEM); 1255 continue; 1256 } 1257 } 1258 } 1259 } 1260 1261 /* simple values */ 1262 switch (ch) 1263 { 1264 case '0': /* fallthrough */ 1265 case '1': /* fallthrough */ 1266 case '2': /* fallthrough */ 1267 case '3': /* fallthrough */ 1268 case '4': /* fallthrough */ 1269 case '5': /* fallthrough */ 1270 case '6': /* fallthrough */ 1271 case '7': /* fallthrough */ 1272 case '8': /* fallthrough */ 1273 case '9': 1274 FSM_XERR(skip_positive(src, p)); 1275 break; 1276 case '-': 1277 FSM_XERR(skip_negative(src, p)); 1278 break; 1279 case 'n': 1280 FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_NULL)); 1281 break; 1282 case 't': 1283 FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_TRUE)); 1284 break; 1285 case 'f': 1286 FSM_XERR(advance_dword(src, p, 0, *p - 1, VS_ALSE)); 1287 break; 1288 case '[': 1289 FSM_PUSH(self, FSM_ARR_0); 1290 break; 1291 case '{': 1292 FSM_PUSH(self, FSM_OBJ_0); 1293 break; 1294 case '"': 1295 { 1296 if (validate_flag == VALID_DEFAULT) 1297 { 1298 FSM_XERR(skip_string(src, p)); 1299 } 1300 else if (validate_flag == VALID_FULL) 1301 { 1302 FSM_XERR(validate_string(src, p)); 1303 } 1304 break; 1305 } 1306 case 0: 1307 return -ERR_EOF; 1308 default: 1309 return -ERR_INVAL; 1310 } 1311 } 1312 1313 /* all done */ 1314 return vi; 1315 } 1316 1317 long skip_number(const char *sp, size_t nb) 1318 { 1319 long di = -1; 1320 long ei = -1; 1321 long si = -1; 1322 const char *ss = sp; 1323 1324 /* check for EOF */ 1325 if (nb == 0) 1326 { 1327 return -1; 1328 } 1329 1330 /* special case of '0' */ 1331 if (*sp == '0' && (nb == 1 || (sp[1] != '.' && sp[1] != 'e' && sp[1] != 'E'))) 1332 { 1333 return 1; 1334 } 1335 1336 #if USE_AVX2 1337 /* can do with AVX-2 */ 1338 if (likely(nb >= 32)) 1339 { 1340 __m256i d9 = _mm256_set1_epi8('9'); 1341 __m256i ds = _mm256_set1_epi8('/'); 1342 __m256i dp = _mm256_set1_epi8('.'); 1343 __m256i el = _mm256_set1_epi8('e'); 1344 __m256i eu = _mm256_set1_epi8('E'); 1345 __m256i xp = _mm256_set1_epi8('+'); 1346 __m256i xm = _mm256_set1_epi8('-'); 1347 1348 /* 32-byte loop */ 1349 do 1350 { 1351 __m256i sb = _mm256_loadu_si256((const void *)sp); 1352 __m256i i0 = _mm256_cmpgt_epi8(sb, ds); 1353 __m256i i9 = _mm256_cmpgt_epi8(sb, d9); 1354 __m256i id = _mm256_cmpeq_epi8(sb, dp); 1355 __m256i il = _mm256_cmpeq_epi8(sb, el); 1356 __m256i iu = _mm256_cmpeq_epi8(sb, eu); 1357 __m256i ip = _mm256_cmpeq_epi8(sb, xp); 1358 __m256i im = _mm256_cmpeq_epi8(sb, xm); 1359 __m256i iv = _mm256_andnot_si256(i9, i0); 1360 __m256i ie = _mm256_or_si256(il, iu); 1361 __m256i is = _mm256_or_si256(ip, im); 1362 __m256i rt = _mm256_or_si256(iv, id); 1363 __m256i ru = _mm256_or_si256(ie, is); 1364 __m256i rv = _mm256_or_si256(rt, ru); 1365 1366 /* exponent and sign position */ 1367 uint32_t md = _mm256_movemask_epi8(id); 1368 uint32_t me = _mm256_movemask_epi8(ie); 1369 uint32_t ms = _mm256_movemask_epi8(is); 1370 uint32_t mr = _mm256_movemask_epi8(rv); 1371 1372 /* mismatch position */ 1373 uint32_t v; 1374 uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000); 1375 1376 /* mask out excess characters */ 1377 if (i != 32) 1378 { 1379 md &= (1 << i) - 1; 1380 me &= (1 << i) - 1; 1381 ms &= (1 << i) - 1; 1382 } 1383 1384 /* check & update decimal point, exponent and sign index */ 1385 check_bits(md) 1386 check_bits(me) 1387 check_bits(ms) 1388 check_vidx(di, md) 1389 check_vidx(ei, me) 1390 check_vidx(si, ms) 1391 1392 /* check for valid number */ 1393 if (i != 32) 1394 { 1395 sp += i; 1396 _mm256_zeroupper(); 1397 goto check_index; 1398 } 1399 1400 /* move to next block */ 1401 sp += 32; 1402 nb -= 32; 1403 } while (nb >= 32); 1404 1405 /* clear the upper half to prevent AVX-SSE transition penalty */ 1406 _mm256_zeroupper(); 1407 } 1408 #endif 1409 1410 /* can do with SSE */ 1411 if (likely(nb >= 16)) 1412 { 1413 __m128i dc = _mm_set1_epi8(':'); 1414 __m128i ds = _mm_set1_epi8('/'); 1415 __m128i dp = _mm_set1_epi8('.'); 1416 __m128i el = _mm_set1_epi8('e'); 1417 __m128i eu = _mm_set1_epi8('E'); 1418 __m128i xp = _mm_set1_epi8('+'); 1419 __m128i xm = _mm_set1_epi8('-'); 1420 1421 /* 16-byte loop */ 1422 do 1423 { 1424 __m128i sb = _mm_loadu_si128((const void *)sp); 1425 __m128i i0 = _mm_cmpgt_epi8(sb, ds); 1426 __m128i i9 = _mm_cmplt_epi8(sb, dc); 1427 __m128i id = _mm_cmpeq_epi8(sb, dp); 1428 __m128i il = _mm_cmpeq_epi8(sb, el); 1429 __m128i iu = _mm_cmpeq_epi8(sb, eu); 1430 __m128i ip = _mm_cmpeq_epi8(sb, xp); 1431 __m128i im = _mm_cmpeq_epi8(sb, xm); 1432 __m128i iv = _mm_and_si128(i9, i0); 1433 __m128i ie = _mm_or_si128(il, iu); 1434 __m128i is = _mm_or_si128(ip, im); 1435 __m128i rt = _mm_or_si128(iv, id); 1436 __m128i ru = _mm_or_si128(ie, is); 1437 __m128i rv = _mm_or_si128(rt, ru); 1438 1439 /* exponent and sign position */ 1440 uint32_t md = _mm_movemask_epi8(id); 1441 uint32_t me = _mm_movemask_epi8(ie); 1442 uint32_t ms = _mm_movemask_epi8(is); 1443 uint32_t mr = _mm_movemask_epi8(rv); 1444 1445 /* mismatch position */ 1446 uint32_t v; 1447 uint32_t i = __builtin_ctzll(~mr | 0x00010000); 1448 1449 /* mask out excess characters */ 1450 if (i != 16) 1451 { 1452 md &= (1 << i) - 1; 1453 me &= (1 << i) - 1; 1454 ms &= (1 << i) - 1; 1455 } 1456 1457 /* check & update exponent and sign index */ 1458 check_bits(md) 1459 check_bits(me) 1460 check_bits(ms) 1461 check_vidx(di, md) 1462 check_vidx(ei, me) 1463 check_vidx(si, ms) 1464 1465 /* check for valid number */ 1466 if (i != 16) 1467 { 1468 sp += i; 1469 goto check_index; 1470 } 1471 1472 /* move to next block */ 1473 sp += 16; 1474 nb -= 16; 1475 } while (nb >= 16); 1476 } 1477 1478 /* remaining bytes, do with scalar code */ 1479 while (likely(nb-- > 0)) 1480 { 1481 switch (*sp++) 1482 { 1483 case '0': /* fallthrough */ 1484 case '1': /* fallthrough */ 1485 case '2': /* fallthrough */ 1486 case '3': /* fallthrough */ 1487 case '4': /* fallthrough */ 1488 case '5': /* fallthrough */ 1489 case '6': /* fallthrough */ 1490 case '7': /* fallthrough */ 1491 case '8': /* fallthrough */ 1492 case '9': 1493 break; 1494 case '.': 1495 check_sidx(di); 1496 break; 1497 case 'e': /* fallthrough */ 1498 case 'E': 1499 check_sidx(ei); 1500 break; 1501 case '+': /* fallthrough */ 1502 case '-': 1503 check_sidx(si); 1504 break; 1505 default: 1506 sp--; 1507 goto check_index; 1508 } 1509 } 1510 check_index: 1511 if (di == 0 || si == 0 || ei == 0) 1512 { 1513 return -1; 1514 } 1515 else if (di == sp - ss - 1 || si == sp - ss - 1 || ei == sp - ss - 1) 1516 { 1517 return -(sp - ss); 1518 } 1519 else if (si > 0 && ei != si - 1) 1520 { 1521 return -si - 1; 1522 } 1523 else if (di >= 0 && ei >= 0 && di > ei - 1) 1524 { 1525 return -di - 1; 1526 } 1527 else if (di >= 0 && ei >= 0 && di == ei - 1) 1528 { 1529 return -ei - 1; 1530 } 1531 else 1532 { 1533 return sp - ss; 1534 } 1535 } 1536 1537 long skip_one(const GoString *src, long *p, StateMachine *m) 1538 { 1539 FSM_INIT(m, FSM_VAL); 1540 return fsm_exec(m, src, p, VALID_DEFAULT); 1541 } 1542 1543 long skip_array(const GoString *src, long *p, StateMachine *m) 1544 { 1545 FSM_INIT(m, FSM_ARR_0); 1546 return fsm_exec(m, src, p, VALID_DEFAULT); 1547 } 1548 1549 long skip_object(const GoString *src, long *p, StateMachine *m) 1550 { 1551 FSM_INIT(m, FSM_OBJ_0); 1552 return fsm_exec(m, src, p, VALID_DEFAULT); 1553 } 1554 1555 long skip_string(const GoString *src, long *p) 1556 { 1557 int64_t v; 1558 ssize_t q = *p - 1; 1559 ssize_t e = advance_string(src, *p, &v); 1560 1561 /* check for errors, and update the position */ 1562 if (e >= 0) 1563 { 1564 *p = e; 1565 return q; 1566 } 1567 else 1568 { 1569 *p = src->len; 1570 return e; 1571 } 1572 } 1573 1574 long validate_string(const GoString *src, long *p) 1575 { 1576 int64_t v; 1577 ssize_t q = *p - 1; 1578 ssize_t e = advance_validate_string(src, *p, &v); 1579 1580 /* check for errors in string advance */ 1581 if (e < 0) 1582 { 1583 *p = e == -ERR_EOF ? src->len : v; 1584 return e; 1585 } 1586 1587 /* check for errors in UTF-8 validate */ 1588 ssize_t nb = e - *p - 1; 1589 ssize_t r = utf8_validate(src->buf + *p, nb); 1590 if (r >= 0) 1591 { 1592 *p += r; 1593 return -ERR_INVAL; 1594 } 1595 *p = e; 1596 return q; 1597 } 1598 1599 long skip_negative(const GoString *src, long *p) 1600 { 1601 long i = *p; 1602 long r = skip_number(src->buf + i, src->len - i); 1603 1604 /* check for errors */ 1605 if (r < 0) 1606 { 1607 *p -= r + 1; 1608 return -ERR_INVAL; 1609 } 1610 1611 /* update value pointer */ 1612 *p += r; 1613 return i - 1; 1614 } 1615 1616 long skip_positive(const GoString *src, long *p) 1617 { 1618 long i = *p - 1; 1619 long r = skip_number(src->buf + i, src->len - i); 1620 1621 /* check for errors */ 1622 if (r < 0) 1623 { 1624 *p -= r + 2; 1625 return -ERR_INVAL; 1626 } 1627 1628 /* update value pointer */ 1629 *p += r - 1; 1630 return i; 1631 } 1632 1633 long validate_one(const GoString *src, long *p, StateMachine *m) 1634 { 1635 FSM_INIT(m, FSM_VAL); 1636 return fsm_exec(m, src, p, VALID_FULL); 1637 }