github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/native/scanning.c (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "native.h"
    18  #include "utf8.h"
    19  #include "utils.h"
    20  
    21  static const uint64_t ODD_MASK  = 0xaaaaaaaaaaaaaaaa;
    22  static const uint64_t EVEN_MASK = 0x5555555555555555;
    23  
    24  // NOTE: mask referenced from decoder/decoder.go
    25  static const uint64_t MASK_VALIDATE_STRING = 1ull << 5;
    26  static const uint64_t MASK_ALLOW_CONTROL   = 1ull << 31;
    27  
    28  static const double P10_TAB[23] = {
    29      /* <= the connvertion to double is not exact when less than 1 => */     1e-000,
    30      1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010,
    31      1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020,
    32      1e+021, 1e+022 /* <= the connvertion to double is not exact when larger,  => */
    33  };
    34  
    35  static inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) {
    36      uint32_t v;
    37      uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v);
    38  
    39      /* set the carry */
    40      *vo = c;
    41      return v;
    42  }
    43  
    44  static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) {
    45      unsigned long long v;
    46      uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
    47  
    48      /* set the carry */
    49      *vo = c;
    50      return v;
    51  }
    52  
    53  static inline char isspace(char ch) {
    54      return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t';
    55  }
    56  
    57  const int MASK_USE_NUMBER = 1<<1;
    58  
    59  static inline void vdigits(const GoString *src, long *p, JsonState *ret, uint64_t flag) {
    60      --*p;
    61      if (flag & MASK_USE_NUMBER) {
    62          long i = skip_number(src, p);
    63          if (i < 0) {
    64              ret->vt = i;
    65              return;
    66          }
    67          ret->vt = V_DOUBLE;
    68          ret->ep = i;
    69          return;
    70      }
    71      vnumber(src, p, ret);
    72  }
    73  
    74  static inline char advance_ns(const GoString *src, long *p) {
    75      size_t       vi = *p;
    76      size_t       nb = src->len;
    77      const char * sp = src->buf;
    78  
    79      /* it's likely to run into non-spaces within a few
    80       * characters, so test up to 4 characters manually */
    81      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    82      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    83      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    84      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    85  
    86      /* check EOF */
    87      if (vi >= nb) {
    88          *p = vi;
    89          return 0;
    90      }
    91  
    92      /* too many spaces, use SIMD to search for characters */
    93      if ((vi = lspace(sp, nb, vi)) >= nb) {
    94          return 0;
    95      }
    96  
    97  nospace:
    98      *p = vi + 1;
    99      return src->buf[vi];
   100  }
   101  
   102  static inline int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val) {
   103      if (*p > src->len + dec - 4) {
   104          *p = src->len;
   105          return -ERR_EOF;
   106      } else if (*(uint32_t *)(src->buf + *p - dec) == val) {
   107          *p += 4 - dec;
   108          return ret;
   109      } else {
   110          *p -= dec;
   111          for (int i = 0; src->buf[*p] == (val & 0xff) && i < 4; i++, ++*p) { val >>= 8; }
   112          return -ERR_INVAL;
   113      }
   114  }
   115  
   116  static inline ssize_t advance_string_default(const GoString *src, long p, int64_t *ep) {
   117      char     ch;
   118      uint64_t es;
   119      uint64_t fe;
   120      uint64_t os;
   121      uint64_t m0;
   122      uint64_t m1;
   123      uint64_t cr = 0;
   124  
   125      /* prevent out-of-bounds accessing */
   126      if (unlikely(src->len == p)) {
   127          return -ERR_EOF;
   128      }
   129  
   130      /* buffer pointers */
   131      size_t       nb = src->len;
   132      const char * sp = src->buf;
   133      const char * ss = src->buf;
   134  
   135  #define ep_init()   *ep = -1;
   136  #define ep_setc()   ep_setx(sp - ss - 1)
   137  #define ep_setx(x)  if (*ep == -1) { *ep = (x); }
   138  
   139      /* seek to `p` */
   140      nb -= p;
   141      sp += p;
   142      ep_init()
   143  
   144  #if USE_AVX2
   145      /* initialize vectors */
   146      __m256i v0;
   147      __m256i v1;
   148      __m256i q0;
   149      __m256i q1;
   150      __m256i x0;
   151      __m256i x1;
   152      __m256i cq = _mm256_set1_epi8('"');
   153      __m256i cx = _mm256_set1_epi8('\\');
   154  
   155      /* partial masks */
   156      uint32_t s0;
   157      uint32_t s1;
   158      uint32_t t0;
   159      uint32_t t1;
   160  #else
   161      /* initialize vectors */
   162      __m128i v0;
   163      __m128i v1;
   164      __m128i v2;
   165      __m128i v3;
   166      __m128i q0;
   167      __m128i q1;
   168      __m128i q2;
   169      __m128i q3;
   170      __m128i x0;
   171      __m128i x1;
   172      __m128i x2;
   173      __m128i x3;
   174      __m128i cq = _mm_set1_epi8('"');
   175      __m128i cx = _mm_set1_epi8('\\');
   176  
   177      /* partial masks */
   178      uint32_t s0;
   179      uint32_t s1;
   180      uint32_t s2;
   181      uint32_t s3;
   182      uint32_t t0;
   183      uint32_t t1;
   184      uint32_t t2;
   185      uint32_t t3;
   186  #endif
   187  
   188  #define m0_mask(add)                \
   189      m1 &= ~cr;                      \
   190      fe  = (m1 << 1) | cr;           \
   191      os  = (m1 & ~fe) & ODD_MASK;    \
   192      es  = add(os, m1, &cr) << 1;    \
   193      m0 &= ~(fe & (es ^ EVEN_MASK));
   194  
   195      /* 64-byte SIMD loop */
   196      while (likely(nb >= 64)) {
   197  #if USE_AVX2
   198          v0 = _mm256_loadu_si256   ((const void *)(sp +  0));
   199          v1 = _mm256_loadu_si256   ((const void *)(sp + 32));
   200          q0 = _mm256_cmpeq_epi8    (v0, cq);
   201          q1 = _mm256_cmpeq_epi8    (v1, cq);
   202          x0 = _mm256_cmpeq_epi8    (v0, cx);
   203          x1 = _mm256_cmpeq_epi8    (v1, cx);
   204          s0 = _mm256_movemask_epi8 (q0);
   205          s1 = _mm256_movemask_epi8 (q1);
   206          t0 = _mm256_movemask_epi8 (x0);
   207          t1 = _mm256_movemask_epi8 (x1);
   208          m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
   209          m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
   210  #else
   211          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   212          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   213          v2 = _mm_loadu_si128   ((const void *)(sp + 32));
   214          v3 = _mm_loadu_si128   ((const void *)(sp + 48));
   215          q0 = _mm_cmpeq_epi8    (v0, cq);
   216          q1 = _mm_cmpeq_epi8    (v1, cq);
   217          q2 = _mm_cmpeq_epi8    (v2, cq);
   218          q3 = _mm_cmpeq_epi8    (v3, cq);
   219          x0 = _mm_cmpeq_epi8    (v0, cx);
   220          x1 = _mm_cmpeq_epi8    (v1, cx);
   221          x2 = _mm_cmpeq_epi8    (v2, cx);
   222          x3 = _mm_cmpeq_epi8    (v3, cx);
   223          s0 = _mm_movemask_epi8 (q0);
   224          s1 = _mm_movemask_epi8 (q1);
   225          s2 = _mm_movemask_epi8 (q2);
   226          s3 = _mm_movemask_epi8 (q3);
   227          t0 = _mm_movemask_epi8 (x0);
   228          t1 = _mm_movemask_epi8 (x1);
   229          t2 = _mm_movemask_epi8 (x2);
   230          t3 = _mm_movemask_epi8 (x3);
   231          m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
   232          m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
   233  #endif
   234          /** update first quote position */
   235          if (unlikely(m1 != 0)) {
   236              ep_setx(sp - ss + __builtin_ctzll(m1))
   237          }
   238  
   239          /** mask all the escaped quotes */
   240          if (unlikely(m1 != 0 || cr != 0)) {
   241              m0_mask(add64)
   242          }
   243         
   244          /* check for end quote */
   245          if (m0 != 0) {
   246              return sp - ss + __builtin_ctzll(m0) + 1;
   247          }
   248  
   249          /* move to the next block */
   250          sp += 64;
   251          nb -= 64;
   252      }
   253  
   254      /* 32-byte SIMD round */
   255      if (likely(nb >= 32)) {
   256  #if USE_AVX2
   257          v0 = _mm256_loadu_si256   ((const void *)sp);
   258          q0 = _mm256_cmpeq_epi8    (v0, cq);
   259          x0 = _mm256_cmpeq_epi8    (v0, cx);
   260          s0 = _mm256_movemask_epi8 (q0);
   261          t0 = _mm256_movemask_epi8 (x0);
   262          m0 = (uint64_t)s0;
   263          m1 = (uint64_t)t0;
   264  #else
   265          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   266          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   267          q0 = _mm_cmpeq_epi8    (v0, cq);
   268          q1 = _mm_cmpeq_epi8    (v1, cq);
   269          x0 = _mm_cmpeq_epi8    (v0, cx);
   270          x1 = _mm_cmpeq_epi8    (v1, cx);
   271          s0 = _mm_movemask_epi8 (q0);
   272          s1 = _mm_movemask_epi8 (q1);
   273          t0 = _mm_movemask_epi8 (x0);
   274          t1 = _mm_movemask_epi8 (x1);
   275          m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
   276          m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
   277  #endif
   278         
   279          /** update first quote position */
   280          if (unlikely(m1 != 0)) {
   281              ep_setx(sp - ss + __builtin_ctzll(m1))
   282          }
   283  
   284          /** mask all the escaped quotes */
   285          if (unlikely(m1 != 0 || cr != 0)) {
   286              m0_mask(add32)
   287          }
   288         
   289          /* check for end quote */
   290          if (m0 != 0) {
   291              return sp - ss + __builtin_ctzll(m0) + 1;
   292          }
   293  
   294          /* move to the next block */
   295          sp += 32;
   296          nb -= 32;
   297      }
   298  
   299      /* check for carry */
   300      if (unlikely(cr != 0)) {
   301          if (nb == 0) {
   302              return -ERR_EOF;
   303          } else {
   304              ep_setc()
   305              sp++, nb--;
   306          }
   307      }
   308  
   309      /* handle the remaining bytes with scalar code */
   310      while (nb-- > 0 && (ch = *sp++) != '"') {
   311          if (unlikely(ch == '\\')) {
   312              if (nb == 0) {
   313                  return -ERR_EOF;
   314              } else {
   315                  ep_setc()
   316                  sp++, nb--;
   317              }
   318          }
   319      }
   320  
   321  #undef ep_init
   322  #undef ep_setc
   323  #undef ep_setx
   324  #undef m0_mask
   325  
   326      /* check for quotes */
   327      if (ch == '"') {
   328          return sp - ss;
   329      } else {
   330          return -ERR_EOF;
   331      }
   332  }
   333  
   334  #if USE_AVX2
   335  
   336  static inline int _mm256_get_mask(__m256i v, __m256i t) {
   337      return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, t));
   338  }
   339  
   340  // contrl char: 0x00 ~ 0x1F
   341  static inline int _mm256_cchars_mask(__m256i v) {
   342      __m256i e1 = _mm256_cmpgt_epi8 (v, _mm256_set1_epi8(-1));
   343      __m256i e2 = _mm256_cmpgt_epi8 (v, _mm256_set1_epi8(31));
   344      return    _mm256_movemask_epi8 (_mm256_andnot_si256 (e2, e1));
   345  }
   346  
   347  // ascii: 0x00 ~ 0x7F
   348  static inline int _mm256_nonascii_mask(__m256i v) {
   349      return _mm256_movemask_epi8(v);
   350  }
   351  
   352  #endif
   353  
   354  static inline int _mm_get_mask(__m128i v, __m128i t) {
   355      return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t));
   356  }
   357  
   358  // contrl char: 0x00 ~ 0x1F
   359  static inline int _mm_cchars_mask(__m128i v) {
   360      __m128i e1 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(-1));
   361      __m128i e2 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(31));
   362      return    _mm_movemask_epi8 (_mm_andnot_si128 (e2, e1));
   363  }
   364  
   365  // ascii: 0x00 ~ 0x7F
   366  static inline int _mm_nonascii_mask(__m128i v) {
   367      return _mm_movemask_epi8(v);
   368  }
   369  
   370  static inline ssize_t advance_string_validate(const GoString *src, long p, int64_t *ep) {
   371      char     ch;
   372      uint64_t m0, m1, m2;
   373      uint64_t es, fe, os;
   374      uint64_t cr = 0;
   375      long     qp = 0;
   376      long     np = 0;
   377  
   378      /* buffer pointers */
   379      size_t       nb = src->len;
   380      const char * sp = src->buf;
   381      const char * ss = src->buf;
   382  
   383      /* prevent out-of-bounds accessing */
   384      if (unlikely(nb == p)) {
   385          return -ERR_EOF;
   386      }
   387  
   388  #define ep_init()    *ep = -1;
   389  #define ep_setc()    ep_setx(sp - ss - 1)
   390  #define ep_setx(x)   if (*ep == -1) { *ep = (x); }
   391  #define ep_seterr(x)  *ep = (x);
   392  
   393      /* seek to `p` */
   394      nb -= p;
   395      sp += p;
   396      ep_init()
   397  
   398  #if USE_AVX2
   399      /* initialize vectors */
   400      __m256i v0;
   401      __m256i v1;
   402      __m256i cq = _mm256_set1_epi8('"');
   403      __m256i cx = _mm256_set1_epi8('\\');
   404  
   405      /* partial masks */
   406      uint32_t s0, s1;
   407      uint32_t t0, t1;
   408      uint32_t c0, c1;
   409  #else
   410      /* initialize vectors */
   411      __m128i v0;
   412      __m128i v1;
   413      __m128i v2;
   414      __m128i v3;
   415      __m128i cq = _mm_set1_epi8('"');
   416      __m128i cx = _mm_set1_epi8('\\');
   417  
   418      /* partial masks */
   419      uint32_t s0, s1, s2, s3;
   420      uint32_t t0, t1, t2, t3;
   421      uint32_t c0, c1, c2, c3;
   422  #endif
   423  
   424  #define m0_mask(add)                \
   425      m1 &= ~cr;                      \
   426      fe  = (m1 << 1) | cr;           \
   427      os  = (m1 & ~fe) & ODD_MASK;    \
   428      es  = add(os, m1, &cr) << 1;    \
   429      m0 &= ~(fe & (es ^ EVEN_MASK));
   430  
   431      /* 64-byte SIMD loop */
   432      while (likely(nb >= 64)) {
   433  #if USE_AVX2
   434          v0 = _mm256_loadu_si256   ((const void *)(sp +  0));
   435          v1 = _mm256_loadu_si256   ((const void *)(sp + 32));
   436          s0 = _mm256_get_mask(v0, cq);
   437          s1 = _mm256_get_mask(v1, cq);
   438          t0 = _mm256_get_mask(v0, cx);
   439          t1 = _mm256_get_mask(v1, cx);
   440          c0 = _mm256_cchars_mask(v0);
   441          c1 = _mm256_cchars_mask(v1);
   442          m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
   443          m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
   444          m2 = ((uint64_t)c1 << 32) | (uint64_t)c0;
   445  #else
   446          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   447          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   448          v2 = _mm_loadu_si128   ((const void *)(sp + 32));
   449          v3 = _mm_loadu_si128   ((const void *)(sp + 48));
   450          s0 = _mm_get_mask(v0, cq);
   451          s1 = _mm_get_mask(v1, cq);
   452          s2 = _mm_get_mask(v2, cq);
   453          s3 = _mm_get_mask(v3, cq);
   454          t0 = _mm_get_mask(v0, cx);
   455          t1 = _mm_get_mask(v1, cx);
   456          t2 = _mm_get_mask(v2, cx);
   457          t3 = _mm_get_mask(v3, cx);
   458          c0 = _mm_cchars_mask(v0);
   459          c1 = _mm_cchars_mask(v1);
   460          c2 = _mm_cchars_mask(v2);
   461          c3 = _mm_cchars_mask(v3);
   462          m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
   463          m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
   464          m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0;
   465  
   466  #endif
   467         
   468          /** update first quote position */
   469          if (unlikely(m1 != 0)) {
   470              ep_setx(sp - ss + __builtin_ctzll(m1))
   471          }
   472  
   473          /** mask all the escaped quotes */
   474          if (unlikely(m1 != 0 || cr != 0)) {
   475              m0_mask(add64)
   476          }
   477  
   478          qp = m0 ? __builtin_ctzll(m0) : 64;
   479          np = m2 ? __builtin_ctzll(m2) : 64;
   480         
   481          /* get the position of end quote */
   482          if (m0 != 0) {
   483              /* check control chars in JSON string */
   484              if (unlikely(np < qp)) {
   485                  ep_seterr(sp - ss + np)
   486                 
   487                  return -ERR_INVAL;
   488              }
   489              return sp - ss + qp + 1;
   490          }
   491  
   492          /* check control chars in JSON string */
   493          if (unlikely(m2 != 0)) {
   494              ep_setx(sp - ss + np)
   495             
   496              return -ERR_INVAL;
   497          }
   498  
   499          /* move to the next block */
   500          sp += 64;
   501          nb -= 64;
   502      }
   503  
   504      /* 32-byte SIMD round */
   505      if (likely(nb >= 32)) {
   506  #if USE_AVX2
   507          v0 = _mm256_loadu_si256   ((const void *)sp);
   508          s0 = _mm256_get_mask (v0, cq);
   509          t0 = _mm256_get_mask (v0, cx);
   510          c0 = _mm256_cchars_mask(v0);
   511          m0 = (uint64_t)s0;
   512          m1 = (uint64_t)t0;
   513          m2 = (uint64_t)c0;
   514  #else
   515          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   516          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   517          s0 = _mm_get_mask(v0, cq);
   518          s1 = _mm_get_mask(v1, cq);
   519          t0 = _mm_get_mask(v0, cx);
   520          t1 = _mm_get_mask(v1, cx);
   521          c0 = _mm_cchars_mask(v0);
   522          c1 = _mm_cchars_mask(v1);
   523          m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
   524          m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
   525          m2 = ((uint64_t)c1 << 16) | (uint64_t)c0;
   526  #endif
   527         
   528          /** update first quote position */
   529          if (unlikely(m1 != 0)) {
   530              ep_setx(sp - ss + __builtin_ctzll(m1))
   531          }
   532  
   533          /** mask all the escaped quotes */
   534          if (unlikely(m1 != 0 || cr != 0)) {
   535              m0_mask(add32)
   536          }
   537         
   538          qp = m0 ? __builtin_ctzll(m0) : 64;
   539          np = m2 ? __builtin_ctzll(m2) : 64;
   540         
   541          /* get the position of end quote */
   542          if (m0 != 0) {
   543              if (unlikely(np < qp)) {
   544                  ep_seterr(sp - ss + np)
   545                  return -ERR_INVAL;
   546              }
   547              return sp - ss + qp + 1;
   548          }
   549  
   550          /* check control chars in JSON string */
   551          if (unlikely(m2 != 0)) {
   552              ep_seterr(sp - ss + __builtin_ctzll(m2))
   553              return -ERR_INVAL;
   554          }
   555  
   556          /* move to the next block */
   557          sp += 32;
   558          nb -= 32;
   559      }
   560  
   561      /* check for carry */
   562      if (unlikely(cr != 0)) {
   563          if (nb == 0) {
   564              return -ERR_EOF;
   565          } else {
   566              ep_setc()
   567              sp++, nb--;
   568          }
   569      }
   570  
   571      /* handle the remaining bytes with scalar code */
   572      while (nb > 0) {
   573          ch = *sp;
   574          if (ch == '"') {
   575             
   576              return sp - ss + 1;
   577          }
   578  
   579          /* valid the escaped chars */
   580          if (unlikely(ch == '\\')) {
   581              if (nb == 1) {
   582                  return -ERR_EOF;
   583              }
   584              ep_setx(sp - ss)
   585              sp += 2, nb -= 2;
   586              continue;
   587          }
   588  
   589          /* valid unescaped chars */
   590          if (unlikely( ch >= 0 && ch <= 0x1f)) { // control chars
   591              ep_seterr(sp - ss)
   592              return -ERR_INVAL;
   593          }
   594  
   595          sp++, nb--;
   596      }
   597      return -ERR_EOF;
   598  #undef ep_init
   599  #undef ep_setc
   600  #undef ep_setx
   601  #undef ep_seterr
   602  #undef m0_mask
   603  }
   604  
   605  static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep, uint64_t flags) {
   606      if ((flags & MASK_VALIDATE_STRING) != 0) {
   607          return advance_string_validate(src, p, ep);
   608      } else {
   609          return advance_string_default(src, p, ep);
   610      }
   611  }
   612  
   613  /** Value Scanning Routines **/
   614  
   615  long value(const char *s, size_t n, long p, JsonState *ret, uint64_t flags) {
   616      long     q = p;
   617      GoString m = {.buf = s, .len = n};
   618      bool allow_control = (flags & MASK_ALLOW_CONTROL) != 0;
   619      /* parse the next identifier, q is UNSAFE, may cause out-of-bounds accessing */
   620      switch (advance_ns(&m, &q)) {
   621          case '-' : /* fallthrough */
   622          case '0' : /* fallthrough */
   623          case '1' : /* fallthrough */
   624          case '2' : /* fallthrough */
   625          case '3' : /* fallthrough */
   626          case '4' : /* fallthrough */
   627          case '5' : /* fallthrough */
   628          case '6' : /* fallthrough */
   629          case '7' : /* fallthrough */
   630          case '8' : /* fallthrough */
   631          case '9' : vdigits(&m, &q, ret, flags)                          ; return q;
   632          case '"' : vstring(&m, &q, ret, flags)                          ; return q;
   633          case 'n' : ret->vt = advance_dword(&m, &q, 1, V_NULL, VS_NULL)  ; return q;
   634          case 't' : ret->vt = advance_dword(&m, &q, 1, V_TRUE, VS_TRUE)  ; return q;
   635          case 'f' : ret->vt = advance_dword(&m, &q, 0, V_FALSE, VS_ALSE) ; return q;
   636          case '[' : ret->vt = V_ARRAY                                    ; return q;
   637          case '{' : ret->vt = V_OBJECT                                   ; return q;
   638          case ':' : ret->vt = allow_control ? V_KEY_SEP : -ERR_INVAL     ; return allow_control ? q : q - 1;
   639          case ',' : ret->vt = allow_control ? V_ELEM_SEP : -ERR_INVAL    ; return allow_control ? q : q - 1;
   640          case ']' : ret->vt = allow_control ? V_ARRAY_END : -ERR_INVAL   ; return allow_control ? q : q - 1;
   641          case '}' : ret->vt = allow_control ? V_OBJECT_END : -ERR_INVAL  ; return allow_control ? q : q - 1;
   642          case  0  : ret->vt = V_EOF                                      ; return q;
   643          default  : ret->vt = -ERR_INVAL                                 ; return q - 1;
   644      }
   645  }
   646  
   647  void vstring(const GoString *src, long *p, JsonState *ret, uint64_t flags) {
   648      int64_t v = -1;
   649      int64_t i = *p;
   650      ssize_t e = advance_string(src, i, &v, flags);
   651     
   652      /* check for errors */
   653      if (e < 0) {
   654          *p = src->len;
   655          ret->vt = e;
   656          return;
   657      }
   658  
   659      /* update the result, and fix the escape position (escaping past the end of string) */
   660      *p = e;
   661      ret->iv = i;
   662      ret->vt = V_STRING;
   663      ret->ep = v >= e ? -1 : v;
   664  }
   665  
   666  #define set_vt(t)   \
   667      ret->vt = t;
   668  
   669  #define init_ret(t) \
   670      ret->vt = t;    \
   671      ret->dv = 0.0;  \
   672      ret->iv = 0;    \
   673      ret->ep = *p;
   674  
   675  #define check_eof()         \
   676      if (i >= n) {           \
   677          *p = n;             \
   678          ret->vt = -ERR_EOF; \
   679          return;             \
   680      }
   681  
   682  #define check_sign(on_neg)  \
   683      if (s[i] == '-') {      \
   684          i++;                \
   685          on_neg;             \
   686          check_eof()         \
   687      }
   688  
   689  #define check_digit()               \
   690      if (s[i] < '0' || s[i] > '9') { \
   691          *p = i;                     \
   692          ret->vt = -ERR_INVAL;       \
   693          return;                     \
   694      }
   695  
   696  #define check_leading_zero()                                                                    \
   697      if (s[i] == '0' && (i >= n || (s[i + 1] != '.' && s[i + 1] != 'e' && s[i + 1] != 'E'))) {   \
   698          *p = ++i;                                                                               \
   699          return;                                                                                 \
   700      }
   701  
   702  #define parse_sign(sgn)                 \
   703      if (s[i] == '+' || s[i] == '-') {   \
   704          sgn = s[i++] == '+' ? 1 : -1;   \
   705          check_eof()                     \
   706      }
   707  
   708  #define is_digit(val) \
   709      '0' <= val && val <= '9'
   710  
   711  #define add_integer_to_mantissa(man, man_nd, exp10, dig) \
   712      if (man_nd < 19) {                                   \
   713          man = man * 10 + dig;                            \
   714          man_nd++;                                        \
   715      } else {                                             \
   716          exp10++;                                         \
   717      }
   718  
   719  #define add_float_to_mantissa(man, man_nd, exp10, dig) \
   720      man = man * 10 + dig;                              \
   721      man_nd++;                                          \
   722      exp10--;
   723  
   724  #define parse_float_digits(val, sgn, ...)                       \
   725      while (i < n && s[i] >= '0' && s[i] <= '9' __VA_ARGS__) {   \
   726          val *= 10;                                              \
   727          val += sgn * (s[i++] - '0');                            \
   728      }
   729  
   730  #define parse_integer_digits(val, sgn, ovf)                     \
   731      while (i < n && s[i] >= '0' && s[i] <= '9') {               \
   732          if (add_digit_overflow(val, sgn * (s[i++] - '0'))) {    \
   733              ovf = 1;                                            \
   734              break;                                              \
   735          }                                                       \
   736      }
   737  
   738  #define add_digit_overflow(val, chr) (          \
   739      __builtin_mul_overflow(val, 10, &val) ||    \
   740      __builtin_add_overflow(val, chr, &val)      \
   741  )
   742  
   743  #define vinteger(type, sgn, on_neg)                     \
   744      int  ovf = 0;                                       \
   745      type val = 0;                                       \
   746                                                          \
   747      /* initial buffer pointers */                       \
   748      long         i = *p;                                \
   749      size_t       n = src->len;                          \
   750      const char * s = src->buf;                          \
   751                                                          \
   752      /* initialize the result, and check for '-' */      \
   753      init_ret(V_INTEGER)                                 \
   754      check_eof()                                         \
   755      check_sign(on_neg)                                  \
   756                                                          \
   757      /* check for leading zero or any digits */          \
   758      check_digit()                                       \
   759      check_leading_zero()                                \
   760      parse_integer_digits(val, sgn, ovf)                 \
   761                                                          \
   762      /* check for overflow */                            \
   763      if (ovf) {                                          \
   764          *p = i - 1;                                     \
   765          ret->vt = -ERR_OVERFLOW;                        \
   766          return;                                         \
   767      }                                                   \
   768                                                          \
   769      /* check for the decimal part */                    \
   770      if (i < n && s[i] == '.') {                         \
   771          *p = i;                                         \
   772          ret->vt = -ERR_NUMBER_FMT;                      \
   773          return;                                         \
   774      }                                                   \
   775                                                          \
   776      /* check for the exponent part */                   \
   777      if (i < n && (s[i] == 'e' || s[i] == 'E')) {        \
   778          *p = i;                                         \
   779          ret->vt = -ERR_NUMBER_FMT;                      \
   780          return;                                         \
   781      }                                                   \
   782                                                          \
   783      /* update the result */                             \
   784      *p = i;                                             \
   785      ret->iv = val;
   786  
   787  /** check whether float can represent the val exactly **/
   788  static inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
   789      *val = (double)man;
   790  
   791      if (man >> 52 != 0) {
   792          return false;
   793      }
   794  
   795      /* equal to if (sgn == -1) { *val *= -1; } */
   796      *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63);
   797  
   798      if (exp == 0 || man == 0) {
   799          return true;
   800      } else if (exp > 0 && exp <= 15+22) {
   801          /* uint64 integers: accurate range <= 10^15          *
   802           * Powers of 10: accurate range <= 10^22, as P10_TAB *
   803           * Example: man 1, exp 36, is ok                     */
   804          if (exp > 22) {
   805              *val *= P10_TAB[exp-22];
   806              exp = 22;
   807          }
   808  
   809          /* f is not accurate when too larger */
   810          if (*val > 1e15 || *val < -1e15) {
   811              return false;
   812          }
   813  
   814          *val *= P10_TAB[exp];
   815          return true;
   816      } else if (exp < 0 && exp >= -22) {
   817          *val /=  P10_TAB[-exp];
   818          return true;
   819      }
   820  
   821      return false;
   822  }
   823  
   824  static inline double atof_fast(uint64_t man, int exp, int sgn, int trunc, double *val) {
   825      double val_up = 0.0;
   826  
   827      /* look-up for fast atof if the conversion can be exactly */
   828      if (is_atof_exact(man, exp, sgn, val)) {
   829          return true;
   830      }
   831  
   832      /* A fast atof algorithm for high percison */
   833      if (atof_eisel_lemire64(man, exp, sgn, val)) {
   834          if (!trunc || (atof_eisel_lemire64(man+1, exp, sgn, &val_up) && val_up == *val)) {
   835              return true;
   836          }
   837      }
   838  
   839      return false;
   840  }
   841  
   842  static bool inline is_overflow(uint64_t man, int sgn, int exp10) {
   843      /* the former exp10 != 0 means man has overflowed
   844       * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */
   845      return exp10 != 0 ||
   846          ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63));
   847  }
   848  
   849  void vnumber(const GoString *src, long *p, JsonState *ret) {
   850      int      sgn = 1;
   851      uint64_t man = 0; // mantissa for double (float64)
   852      int   man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t
   853      int    exp10 = 0; // val = sgn * man * 10 ^ exp10
   854      int    trunc = 0;
   855      double   val = 0;
   856  
   857      /* initial buffer pointers */
   858      long         i = *p;
   859      size_t       n = src->len;
   860      const char * s = src->buf;
   861      char     *dbuf = ret->dbuf;
   862      ssize_t   dcap = ret->dcap;
   863  
   864      /* initialize the result, and check for EOF */
   865      init_ret(V_INTEGER)
   866      check_eof()
   867      check_sign(sgn = -1)
   868  
   869      /* check for leading zero */
   870      check_digit()
   871      check_leading_zero()
   872  
   873      /* parse the integer part */
   874      while (i < n && is_digit(s[i])) {
   875          add_integer_to_mantissa(man, man_nd, exp10, (s[i] - '0'))
   876          i++;
   877      }
   878  
   879      if (exp10 > 0) {
   880          trunc = 1;
   881      }
   882  
   883      /* check for decimal points */
   884      if (i < n && s[i] == '.') {
   885          i++;
   886          set_vt(V_DOUBLE)
   887          check_eof()
   888          check_digit()
   889      }
   890  
   891      /* skip the leading zeros of 0.000xxxx */
   892      if (man == 0 && exp10 == 0) {
   893          while (i < n && s[i] == '0') {
   894              i++;
   895              exp10--;
   896          }
   897          man = 0;
   898          man_nd = 0;
   899      }
   900  
   901      /* the fractional part (uint64_t mantissa can represent at most 19 digits) */
   902      while (i < n && man_nd < 19 && is_digit(s[i])) {
   903          add_float_to_mantissa(man, man_nd, exp10, (s[i] - '0'))
   904          i++;
   905      }
   906  
   907       /* skip the remaining digits */
   908      while (i < n && is_digit(s[i])) {
   909          trunc = 1;
   910          i++;
   911      }
   912  
   913      /* check for exponent */
   914      if (i < n && (s[i] == 'e' || s[i] == 'E')) {
   915          int esm = 1;
   916          int exp = 0;
   917  
   918          /* check for the '+' or '-' sign, and parse the power */
   919          i++;
   920          set_vt(V_DOUBLE)
   921          check_eof()
   922          parse_sign(esm)
   923          check_digit()
   924          while (i < n && is_digit(s[i])) {
   925              if (exp < 10000) {
   926                  exp = exp * 10 + (s[i] - '0');
   927              }
   928              i++;
   929          }
   930          exp10 += exp * esm;
   931          goto parse_float;
   932      }
   933  
   934      if (ret->vt == V_INTEGER) {
   935          if (!is_overflow(man, sgn, exp10)) {
   936              ret->iv = (int64_t)man * sgn;
   937  
   938              /* following lines equal to ret->dv = (double)(man) * sgn */
   939              ret->dv = (double)(man);
   940              *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63);
   941  
   942              *p = i;
   943              return;
   944          }
   945          set_vt(V_DOUBLE)
   946      }
   947  
   948  parse_float:
   949      /* when fast algorithms failed, use slow fallback.*/
   950      if(!atof_fast(man, exp10, sgn, trunc, &val)) {
   951          val = atof_native(s + *p, i - *p, dbuf, dcap);
   952      }
   953  
   954      /* check parsed double val */
   955      if (is_infinity(val)) {
   956          ret->vt = -ERR_FLOAT_INF;
   957      }
   958  
   959      /* update the result */
   960      ret->dv = val;
   961      *p = i;
   962  }
   963  
   964  void vsigned(const GoString *src, long *p, JsonState *ret) {
   965      int64_t sgn = 1;
   966      vinteger(int64_t, sgn, sgn = -1)
   967  }
   968  
   969  void vunsigned(const GoString *src, long *p, JsonState *ret) {
   970      vinteger(uint64_t, 1, {
   971          *p = i - 1;
   972          ret->vt = -ERR_NUMBER_FMT;
   973          return;
   974      })
   975  }
   976  
   977  #undef init_ret
   978  #undef check_eof
   979  #undef check_digit
   980  #undef check_leading_zero
   981  #undef parse_sign
   982  #undef is_digit
   983  #undef add_integer_to_mantissa
   984  #undef add_float_to_mantissa
   985  #undef parse_float_digits
   986  #undef parse_integer_digits
   987  #undef add_digit_overflow
   988  #undef vinteger
   989  
   990  /** Value Skipping FSM **/
   991  
   992  #define FSM_VAL         0
   993  #define FSM_ARR         1
   994  #define FSM_OBJ         2
   995  #define FSM_KEY         3
   996  #define FSM_ELEM        4
   997  #define FSM_ARR_0       5
   998  #define FSM_OBJ_0       6
   999  
  1000  #define FSM_DROP(v)     (v)->sp--
  1001  #define FSM_REPL(v, t)  (v)->vt[(v)->sp - 1] = (t)
  1002  
  1003  #define FSM_CHAR(c)     do { if (ch != (c)) return -ERR_INVAL; } while (0)
  1004  #define FSM_XERR(v)     do { long r = (v); if (r < 0) return r; } while (0)
  1005  
  1006  static inline void fsm_init(StateMachine *self, int vt) {
  1007      self->sp = 1;
  1008      self->vt[0] = vt;
  1009  }
  1010  
  1011  static inline long fsm_push(StateMachine *self, int vt) {
  1012      if (self->sp >= MAX_RECURSE) {
  1013          return -ERR_RECURSE_MAX;
  1014      } else {
  1015          self->vt[self->sp++] = vt;
  1016          return 0;
  1017      }
  1018  }
  1019  
  1020  static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, uint64_t flags) {
  1021      int  vt;
  1022      char ch;
  1023      long vi = -1;
  1024  
  1025      /* run until no more nested values */
  1026      while (self->sp) {
  1027          ch = advance_ns(src, p);
  1028          if (ch  == 0) {
  1029              return -ERR_EOF;
  1030          }
  1031          vt = self->vt[self->sp - 1];
  1032  
  1033          /* set the start address if any */
  1034          if (vi == -1) {
  1035              vi = *p - 1;
  1036          }
  1037  
  1038          /* check for special types */
  1039          switch (vt) {
  1040              default: {
  1041                  FSM_DROP(self);
  1042                  break;
  1043              }
  1044  
  1045              /* arrays */
  1046              case FSM_ARR: {
  1047                  switch (ch) {
  1048                      case ']' : FSM_DROP(self);                    continue;
  1049                      case ',' : FSM_XERR(fsm_push(self, FSM_VAL)); continue;
  1050                      default  : return -ERR_INVAL;
  1051                  }
  1052              }
  1053  
  1054              /* objects */
  1055              case FSM_OBJ: {
  1056                  switch (ch) {
  1057                      case '}' : FSM_DROP(self);                    continue;
  1058                      case ',' : FSM_XERR(fsm_push(self, FSM_KEY)); continue;
  1059                      default  : return -ERR_INVAL;
  1060                  }
  1061              }
  1062  
  1063              /* object keys */
  1064              case FSM_KEY: {
  1065                  FSM_CHAR('"');
  1066                  FSM_REPL(self, FSM_ELEM);
  1067                  FSM_XERR(skip_string(src, p, flags));
  1068                  continue;
  1069              }
  1070  
  1071              /* object element */
  1072              case FSM_ELEM: {
  1073                  FSM_CHAR(':');
  1074                  FSM_REPL(self, FSM_VAL);
  1075                  continue;
  1076              }
  1077  
  1078              /* arrays, first element */
  1079              case FSM_ARR_0: {
  1080                  if (ch == ']') {
  1081                      FSM_DROP(self);
  1082                      continue;
  1083                  } else {
  1084                      FSM_REPL(self, FSM_ARR);
  1085                      break;
  1086                  }
  1087              }
  1088  
  1089              /* objects, first pair */
  1090              case FSM_OBJ_0: {
  1091                  switch (ch) {
  1092                      default: {
  1093                          return -ERR_INVAL;
  1094                      }
  1095  
  1096                      /* empty object */
  1097                      case '}': {
  1098                          FSM_DROP(self);
  1099                          continue;
  1100                      }
  1101  
  1102                      /* the quote of the first key */
  1103                      case '"': {
  1104                          FSM_REPL(self, FSM_OBJ);
  1105                          FSM_XERR(skip_string(src, p, flags));
  1106                          FSM_XERR(fsm_push(self, FSM_ELEM));
  1107                          continue;
  1108                      }
  1109                  }
  1110              }
  1111          }
  1112  
  1113          /* simple values */
  1114          switch (ch) {
  1115              case '0' : /* fallthrough */
  1116              case '1' : /* fallthrough */
  1117              case '2' : /* fallthrough */
  1118              case '3' : /* fallthrough */
  1119              case '4' : /* fallthrough */
  1120              case '5' : /* fallthrough */
  1121              case '6' : /* fallthrough */
  1122              case '7' : /* fallthrough */
  1123              case '8' : /* fallthrough */
  1124              case '9' : FSM_XERR(skip_positive(src, p));                     break;
  1125              case '-' : FSM_XERR(skip_negative(src, p));                     break;
  1126              case 'n' : FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_NULL)); break;
  1127              case 't' : FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_TRUE)); break;
  1128              case 'f' : FSM_XERR(advance_dword(src, p, 0, *p - 1, VS_ALSE)); break;
  1129              case '[' : FSM_XERR(fsm_push(self, FSM_ARR_0));                 break;
  1130              case '{' : FSM_XERR(fsm_push(self, FSM_OBJ_0));                 break;
  1131              case '"' : FSM_XERR(skip_string(src, p, flags));                break;
  1132              case  0  : return -ERR_EOF;
  1133              default  : return -ERR_INVAL;
  1134          }
  1135      }
  1136  
  1137      /* all done */
  1138      return vi;
  1139  }
  1140  
  1141  #undef FSM_DROP
  1142  #undef FSM_REPL
  1143  #undef FSM_CHAR
  1144  #undef FSM_XERR
  1145  
  1146  #define check_bits(mv)                              \
  1147      if (unlikely((v = mv & (mv - 1)) != 0)) {       \
  1148          return -(sp - ss + __builtin_ctz(v) + 1);   \
  1149      }
  1150  
  1151  #define check_sidx(iv)          \
  1152      if (likely(iv == -1)) {     \
  1153          iv = sp - ss - 1;       \
  1154      } else {                    \
  1155          return -(sp - ss);      \
  1156      }
  1157  
  1158  #define check_vidx(iv, mv)                              \
  1159      if (mv != 0) {                                      \
  1160          if (likely(iv == -1)) {                         \
  1161              iv = sp - ss + __builtin_ctz(mv);           \
  1162          } else {                                        \
  1163              return -(sp - ss + __builtin_ctz(mv) + 1);  \
  1164          }                                               \
  1165      }
  1166  
  1167  static inline long do_skip_number(const char *sp, size_t nb) {
  1168      long         di = -1;
  1169      long         ei = -1;
  1170      long         si = -1;
  1171      const char * ss = sp;
  1172  
  1173      /* check for EOF */
  1174      if (nb == 0) {
  1175          return -1;
  1176      }
  1177  
  1178      /* special case of '0' */
  1179      if (*sp == '0' && (nb == 1 || (sp[1] != '.' && sp[1] != 'e' && sp[1] != 'E'))) {
  1180          return 1;
  1181      }
  1182  
  1183  #if USE_AVX2
  1184      /* can do with AVX-2 */
  1185      if (likely(nb >= 32)) {
  1186          __m256i d9 = _mm256_set1_epi8('9');
  1187          __m256i ds = _mm256_set1_epi8('/');
  1188          __m256i dp = _mm256_set1_epi8('.');
  1189          __m256i el = _mm256_set1_epi8('e');
  1190          __m256i eu = _mm256_set1_epi8('E');
  1191          __m256i xp = _mm256_set1_epi8('+');
  1192          __m256i xm = _mm256_set1_epi8('-');
  1193  
  1194          /* 32-byte loop */
  1195          do {
  1196              __m256i sb = _mm256_loadu_si256  ((const void *)sp);
  1197              __m256i i0 = _mm256_cmpgt_epi8   (sb, ds);
  1198              __m256i i9 = _mm256_cmpgt_epi8   (sb, d9);
  1199              __m256i id = _mm256_cmpeq_epi8   (sb, dp);
  1200              __m256i il = _mm256_cmpeq_epi8   (sb, el);
  1201              __m256i iu = _mm256_cmpeq_epi8   (sb, eu);
  1202              __m256i ip = _mm256_cmpeq_epi8   (sb, xp);
  1203              __m256i im = _mm256_cmpeq_epi8   (sb, xm);
  1204              __m256i iv = _mm256_andnot_si256 (i9, i0);
  1205              __m256i ie = _mm256_or_si256     (il, iu);
  1206              __m256i is = _mm256_or_si256     (ip, im);
  1207              __m256i rt = _mm256_or_si256     (iv, id);
  1208              __m256i ru = _mm256_or_si256     (ie, is);
  1209              __m256i rv = _mm256_or_si256     (rt, ru);
  1210  
  1211              /* exponent and sign position */
  1212              uint32_t md = _mm256_movemask_epi8(id);
  1213              uint32_t me = _mm256_movemask_epi8(ie);
  1214              uint32_t ms = _mm256_movemask_epi8(is);
  1215              uint32_t mr = _mm256_movemask_epi8(rv);
  1216  
  1217              /* mismatch position */
  1218              uint32_t v;
  1219              uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000);
  1220  
  1221              /* mask out excess characters */
  1222              if (i != 32) {
  1223                  md &= (1 << i) - 1;
  1224                  me &= (1 << i) - 1;
  1225                  ms &= (1 << i) - 1;
  1226              }
  1227  
  1228              /* check & update decimal point, exponent and sign index */
  1229              check_bits(md)
  1230              check_bits(me)
  1231              check_bits(ms)
  1232              check_vidx(di, md)
  1233              check_vidx(ei, me)
  1234              check_vidx(si, ms)
  1235  
  1236              /* check for valid number */
  1237              if (i != 32) {
  1238                  sp += i;
  1239                  _mm256_zeroupper();
  1240                  goto check_index;
  1241              }
  1242  
  1243              /* move to next block */
  1244              sp += 32;
  1245              nb -= 32;
  1246          } while (nb >= 32);
  1247  
  1248          /* clear the upper half to prevent AVX-SSE transition penalty */
  1249          _mm256_zeroupper();
  1250      }
  1251  #endif
  1252  
  1253      /* can do with SSE */
  1254      if (likely(nb >= 16)) {
  1255          __m128i dc = _mm_set1_epi8(':');
  1256          __m128i ds = _mm_set1_epi8('/');
  1257          __m128i dp = _mm_set1_epi8('.');
  1258          __m128i el = _mm_set1_epi8('e');
  1259          __m128i eu = _mm_set1_epi8('E');
  1260          __m128i xp = _mm_set1_epi8('+');
  1261          __m128i xm = _mm_set1_epi8('-');
  1262  
  1263          /* 16-byte loop */
  1264          do {
  1265              __m128i sb = _mm_loadu_si128 ((const void *)sp);
  1266              __m128i i0 = _mm_cmpgt_epi8  (sb, ds);
  1267              __m128i i9 = _mm_cmplt_epi8  (sb, dc);
  1268              __m128i id = _mm_cmpeq_epi8  (sb, dp);
  1269              __m128i il = _mm_cmpeq_epi8  (sb, el);
  1270              __m128i iu = _mm_cmpeq_epi8  (sb, eu);
  1271              __m128i ip = _mm_cmpeq_epi8  (sb, xp);
  1272              __m128i im = _mm_cmpeq_epi8  (sb, xm);
  1273              __m128i iv = _mm_and_si128   (i9, i0);
  1274              __m128i ie = _mm_or_si128    (il, iu);
  1275              __m128i is = _mm_or_si128    (ip, im);
  1276              __m128i rt = _mm_or_si128    (iv, id);
  1277              __m128i ru = _mm_or_si128    (ie, is);
  1278              __m128i rv = _mm_or_si128    (rt, ru);
  1279  
  1280              /* exponent and sign position */
  1281              uint32_t md = _mm_movemask_epi8(id);
  1282              uint32_t me = _mm_movemask_epi8(ie);
  1283              uint32_t ms = _mm_movemask_epi8(is);
  1284              uint32_t mr = _mm_movemask_epi8(rv);
  1285  
  1286              /* mismatch position */
  1287              uint32_t v;
  1288              uint32_t i = __builtin_ctzll(~mr | 0x00010000);
  1289  
  1290              /* mask out excess characters */
  1291              if (i != 16) {
  1292                  md &= (1 << i) - 1;
  1293                  me &= (1 << i) - 1;
  1294                  ms &= (1 << i) - 1;
  1295              }
  1296  
  1297              /* check & update exponent and sign index */
  1298              check_bits(md)
  1299              check_bits(me)
  1300              check_bits(ms)
  1301              check_vidx(di, md)
  1302              check_vidx(ei, me)
  1303              check_vidx(si, ms)
  1304  
  1305              /* check for valid number */
  1306              if (i != 16) {
  1307                  sp += i;
  1308                  goto check_index;
  1309              }
  1310  
  1311              /* move to next block */
  1312              sp += 16;
  1313              nb -= 16;
  1314          } while (nb >= 16);
  1315      }
  1316  
  1317      /* remaining bytes, do with scalar code */
  1318      while (likely(nb-- > 0)) {
  1319          switch (*sp++) {
  1320              case '0' : /* fallthrough */
  1321              case '1' : /* fallthrough */
  1322              case '2' : /* fallthrough */
  1323              case '3' : /* fallthrough */
  1324              case '4' : /* fallthrough */
  1325              case '5' : /* fallthrough */
  1326              case '6' : /* fallthrough */
  1327              case '7' : /* fallthrough */
  1328              case '8' : /* fallthrough */
  1329              case '9' : break;
  1330              case '.' : check_sidx(di); break;
  1331              case 'e' : /* fallthrough */
  1332              case 'E' : check_sidx(ei); break;
  1333              case '+' : /* fallthrough */
  1334              case '-' : check_sidx(si); break;
  1335              default  : sp--; goto check_index;
  1336          }
  1337      }
  1338  check_index:
  1339      if (di == 0 || si == 0 || ei == 0) {
  1340          return -1;
  1341      } else if (di == sp - ss - 1|| si == sp - ss - 1 || ei == sp - ss - 1) {
  1342          return -(sp - ss);
  1343      } else if (si > 0 && ei != si - 1) {
  1344          return -si - 1;
  1345      } else if (di >= 0 && ei >= 0 && di > ei - 1) {
  1346          return -di - 1;
  1347      } else if (di >= 0 && ei >= 0 && di == ei - 1) {
  1348          return -ei - 1;
  1349      } else {
  1350          return sp - ss;
  1351      }
  1352  }
  1353  
  1354  #undef check_bits
  1355  #undef check_sidx
  1356  #undef check_vidx
  1357  
  1358  long skip_array(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
  1359      fsm_init(m, FSM_ARR_0);
  1360      return fsm_exec(m, src, p, flags);
  1361  }
  1362  
  1363  long skip_object(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
  1364      fsm_init(m, FSM_OBJ_0);
  1365      return fsm_exec(m, src, p, flags);
  1366  }
  1367  
  1368  long skip_string(const GoString *src, long *p, uint64_t flags) {
  1369      int64_t v = -1;
  1370      ssize_t q = *p - 1; // start position
  1371      ssize_t e = advance_string(src, *p, &v, flags);
  1372  
  1373      /* check for errors */
  1374      if (e < 0) {
  1375          *p = e == -ERR_EOF ? src->len : v;
  1376          return e;
  1377      }
  1378  
  1379      /* update the position */
  1380      *p = e;
  1381      return q;
  1382  }
  1383  
  1384  long skip_negative(const GoString *src, long *p) {
  1385      long i = *p;
  1386      long r = do_skip_number(src->buf + i, src->len - i);
  1387  
  1388      /* check for errors */
  1389      if (r < 0) {
  1390          *p -= r + 1;
  1391          return -ERR_INVAL;
  1392      }
  1393  
  1394      /* update value pointer */
  1395      *p += r;
  1396      return i - 1;
  1397  }
  1398  
  1399  long skip_positive(const GoString *src, long *p) {
  1400      long i = *p - 1;
  1401      long r = do_skip_number(src->buf + i, src->len - i);
  1402  
  1403      /* check for errors */
  1404      if (r < 0) {
  1405          *p -= r + 2;
  1406          return -ERR_INVAL;
  1407      }
  1408  
  1409      /* update value pointer */
  1410      *p += r - 1;
  1411      return i;
  1412  }
  1413  
  1414  long skip_number(const GoString *src, long *p) {
  1415      const char* ss = src->buf;
  1416      const char* sp = src->buf + *p;
  1417      size_t nb = src->len - *p;
  1418      long i = *p;
  1419      long r;
  1420      bool neg = *sp == '-';
  1421  
  1422      sp += neg;
  1423      nb -= neg;
  1424      if (unlikely(nb <= 0)) {
  1425          *p = sp - ss;
  1426          return -ERR_EOF;
  1427      }
  1428  
  1429      if (unlikely(nb > 0 && (*sp > '9' || *sp < '0'))) {
  1430          *p = sp - ss;
  1431          return -ERR_INVAL;
  1432      }
  1433  
  1434      r = do_skip_number(sp, nb);
  1435      if (unlikely(r < 0)) {
  1436          *p = sp - (r + 1) - ss;
  1437          return -ERR_INVAL;
  1438      }
  1439      *p = sp + r - ss;
  1440      return i;
  1441  }
  1442  
  1443  long skip_one(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
  1444      fsm_init(m, FSM_VAL);
  1445      return fsm_exec(m, src, p, flags);
  1446  }
  1447  
  1448  long validate_one(const GoString *src, long *p, StateMachine *m) {
  1449      fsm_init(m, FSM_VAL);
  1450      return fsm_exec(m, src, p, MASK_VALIDATE_STRING);
  1451  }
  1452  
  1453  /* Faster skip api for sonic.ast */
  1454  
  1455  static always_inline uint64_t get_maskx64(const char *s, char c) {
  1456  #if USE_AVX2
  1457      __m256i v0 = _mm256_loadu_si256((__m256i const *)s);
  1458      __m256i v1 = _mm256_loadu_si256((__m256i const *)(s + 32));
  1459      uint32_t m0 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(v0, _mm256_set1_epi8(c)));
  1460      uint32_t m1 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, _mm256_set1_epi8(c))); 
  1461      return ((uint64_t)(m1) << 32) | (uint64_t)(m0);
  1462  #else
  1463      __m128i v0 = _mm_loadu_si128((__m128i const*)s);
  1464      __m128i v1 = _mm_loadu_si128((__m128i const*)(s + 16));
  1465      __m128i v2 = _mm_loadu_si128((__m128i const*)(s + 32));
  1466      __m128i v3 = _mm_loadu_si128((__m128i const*)(s + 48));
  1467      uint32_t m0 = _mm_movemask_epi8(_mm_cmpeq_epi8(v0, _mm_set1_epi8(c)));
  1468      uint32_t m1 = _mm_movemask_epi8(_mm_cmpeq_epi8(v1, _mm_set1_epi8(c)));
  1469      uint32_t m2 = _mm_movemask_epi8(_mm_cmpeq_epi8(v2, _mm_set1_epi8(c)));
  1470      uint32_t m3 = _mm_movemask_epi8(_mm_cmpeq_epi8(v3, _mm_set1_epi8(c)));
  1471      return ((uint64_t)(m3) << 48) | ((uint64_t)(m2)  << 32) | ((uint64_t)(m1) << 16) | (uint64_t)(m0);
  1472  #endif
  1473  }
  1474  
  1475  static always_inline uint64_t get_maskx32(const char *s, char c) {
  1476  #if USE_AVX2
  1477      __m256i v0 = _mm256_loadu_si256((__m256i const *)s);
  1478      uint64_t m0 = (unsigned)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v0, _mm256_set1_epi8(c)));
  1479      return m0;
  1480  #else
  1481      __m128i v0 = _mm_loadu_si128((__m128i const*)s);
  1482      __m128i v1 = _mm_loadu_si128((__m128i const*)(s + 16));
  1483      uint64_t m0 = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(v0, _mm_set1_epi8(c)));
  1484      uint64_t m1 = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, _mm_set1_epi8(c)));
  1485      return m0 | (m1 << 16);
  1486  #endif
  1487  }
  1488  
  1489  // get the string (besides in quote) mask
  1490  static always_inline uint64_t get_string_maskx64(const char *s, uint64_t *prev_inquote, uint64_t *prev_bs) {
  1491      uint64_t escaped = *prev_bs;
  1492      uint64_t quote_mask = 0, bs_mask = 0;
  1493  
  1494      /* read and get the quote or backslash bitmask */
  1495      quote_mask = get_maskx64(s, '"');
  1496      bs_mask = get_maskx64(s, '\\');
  1497  
  1498      /* get the escaped bitmask */
  1499      if (bs_mask || *prev_bs) {
  1500          bs_mask &= ~(*prev_bs);
  1501          uint64_t follow_bs = (bs_mask << 1) | *prev_bs;
  1502          uint64_t bs_start = bs_mask & ~follow_bs;
  1503          uint64_t odd_start = bs_start & ODD_MASK;
  1504          uint64_t even_or_oc = add64(odd_start, bs_mask, prev_bs);
  1505          uint64_t even_or_escaped = (even_or_oc << 1) ^ EVEN_MASK;
  1506          escaped = follow_bs & even_or_escaped;
  1507      } else {
  1508          *prev_bs = 0;
  1509      }
  1510      quote_mask &= ~escaped;
  1511  
  1512      /* get the inquote bitmask */
  1513      uint64_t inquote = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0, quote_mask), _mm_set1_epi8('\xFF'), 0));
  1514      inquote ^= *prev_inquote;
  1515      *prev_inquote = (uint64_t)(((int64_t)(inquote)) >> 63);
  1516      return inquote;
  1517  }
  1518  
  1519  // get the next json structural, '}', ']' or ','。
  1520  #if USE_AVX2
  1521  static always_inline int get_structural_maskx32(const char *s) {
  1522      __m256i v = _mm256_loadu_si256((const void *)s);
  1523      __m256i e1 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('}'));
  1524      __m256i e2 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8(']'));
  1525      __m256i e3 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8(','));
  1526      __m256i sv = _mm256_or_si256(_mm256_or_si256(e1, e2), e3);
  1527      return _mm256_movemask_epi8(sv);
  1528  }
  1529  #endif
  1530  
  1531  static always_inline int get_structural_maskx16(const char *s) {
  1532      __m128i v = _mm_loadu_si128((const void *)s);
  1533      __m128i e1 = _mm_cmpeq_epi8(v, _mm_set1_epi8('}'));
  1534      __m128i e2 = _mm_cmpeq_epi8(v, _mm_set1_epi8(']'));
  1535      __m128i e3 = _mm_cmpeq_epi8(v, _mm_set1_epi8(','));
  1536      __m128i sv = _mm_or_si128(_mm_or_si128(e1, e2), e3);
  1537      return _mm_movemask_epi8(sv);
  1538  }
  1539  
  1540  // skip the number at the next '}', ']' or ',' or the ending of json.
  1541  static always_inline long skip_number_fast(const GoString *src, long *p) {
  1542      size_t nb = src->len - *p;
  1543      const char *s = src->buf + *p;
  1544      long vi = *p - 1;
  1545      int m = 0;
  1546  
  1547  #if USE_AVX2
  1548      while (likely(nb >= 32)) {
  1549          if ((m = get_structural_maskx32(s))) {
  1550              *p = s - src->buf + __builtin_ctzll(m);
  1551              return vi;
  1552          }
  1553          s += 32, nb -= 32;
  1554      }
  1555  #endif
  1556  
  1557      while (likely(nb >= 16)) {
  1558          if ((m = get_structural_maskx16(s))) {
  1559              *p = s - src->buf + __builtin_ctzll(m);
  1560              return vi;
  1561          }
  1562          s += 16, nb -= 16;
  1563      }
  1564  
  1565      while (likely(nb > 0)) {
  1566          if (*s == '}' || *s == ']' || *s == ',') {
  1567              *p = s - src->buf;
  1568              return vi;
  1569          }
  1570          s++, nb--;
  1571      }
  1572      *p = s - src->buf;
  1573      return vi;
  1574  }
  1575  
  1576  static always_inline long skip_container_fast(const GoString *src, long *p, char lc, char rc) {
  1577      long nb = src->len - *p;
  1578      const char *s = src->buf + *p;
  1579      long vi = *p - 1;
  1580  
  1581      uint64_t prev_inquote = 0, prev_bs = 0;
  1582      uint64_t lbrace = 0, rbrace = 0;
  1583      size_t lnum = 0, rnum = 0, last_lnum = 0;
  1584      uint64_t inquote = 0;
  1585  
  1586      while (likely(nb >= 64)) {
  1587  skip:
  1588          inquote = get_string_maskx64(s, &prev_inquote, &prev_bs);
  1589          lbrace = get_maskx64(s, lc) & ~inquote;
  1590          rbrace = get_maskx64(s, rc) & ~inquote;
  1591  
  1592          /* traverse each right brace */
  1593          last_lnum = lnum;
  1594          while (rbrace > 0) {
  1595              uint64_t lbrace_first = (rbrace - 1) & lbrace;
  1596              lnum = last_lnum + __builtin_popcountll((int64_t)lbrace_first);
  1597              bool is_closed = lnum <= rnum;
  1598              if (is_closed) {
  1599                  *p = src->len - nb + __builtin_ctzll(rbrace) + 1;
  1600                  // *p is out-of-bound access here
  1601                  if (*p > src->len) {
  1602                      *p = src->len;
  1603                      return -ERR_EOF;
  1604                  }
  1605                  return vi;
  1606              }
  1607              rbrace &= (rbrace - 1); // clear the lowest right brace
  1608              rnum ++;
  1609          }
  1610          lnum = last_lnum + __builtin_popcountll((int64_t)lbrace);
  1611          s += 64, nb -= 64;
  1612      }
  1613  
  1614      if (nb <= 0) {
  1615          *p = src->len;
  1616          return -ERR_EOF;
  1617      }
  1618  
  1619      char tbuf[64] = {0};
  1620      bool cross_page = vec_cross_page(s, 64);
  1621      if (cross_page) {
  1622          memcpy_p64(tbuf, s, nb);
  1623          s = tbuf;
  1624      }
  1625      goto skip;
  1626  }
  1627  
  1628  static always_inline long skip_object_fast(const GoString *src, long *p) {
  1629      return skip_container_fast(src, p, '{', '}');
  1630  }
  1631  
  1632  static always_inline long skip_array_fast(const GoString *src, long *p) {
  1633      return skip_container_fast(src, p, '[', ']');
  1634  }
  1635  
  1636  static always_inline long skip_string_fast(const GoString *src, long *p) {
  1637      const char* s = src->buf + *p;
  1638      long nb = src->len - *p;
  1639      long vi = *p - 1;
  1640      uint64_t prev_bs = 0, escaped;
  1641  
  1642      while (likely(nb >= 32)) {
  1643          uint32_t quote = get_maskx32(s, '"');
  1644          uint32_t bs_mask = get_maskx32(s, '\\');
  1645          if (bs_mask || prev_bs) {
  1646              bs_mask &= ~prev_bs;
  1647              uint64_t follow_bs = (bs_mask << 1) | prev_bs;
  1648              uint64_t bs_start = bs_mask & ~follow_bs;
  1649              uint64_t odd_start = bs_start & ODD_MASK;
  1650              uint64_t even_or_oc = add32(odd_start, bs_mask, &prev_bs);
  1651              uint64_t even_or_escaped = (even_or_oc << 1) ^ EVEN_MASK;
  1652              escaped = follow_bs & even_or_escaped;
  1653              quote &= ~escaped;
  1654          }
  1655          if (quote) {
  1656              *p = s + __builtin_ctzll(quote) + 1 - src->buf;
  1657              return vi;
  1658          }
  1659          s += 32;
  1660          nb -= 32;
  1661      }
  1662  
  1663      if (unlikely(prev_bs != 0)) {
  1664          if (nb == 0) return -ERR_EOF;
  1665          s++, nb--;
  1666      }
  1667  
  1668      while (likely(nb > 0)) {
  1669          if (*s == '\\') {
  1670              s += 2, nb -= 2;
  1671              continue;
  1672          }
  1673          if (*s == '"') {
  1674              *p = s - src->buf + 1;
  1675              return vi;
  1676          }
  1677          s++, nb--;
  1678      }
  1679      return -ERR_EOF;
  1680  }
  1681  
  1682  long skip_one_fast(const GoString *src, long *p) {
  1683      char c = advance_ns(src, p);
  1684      /* set the start address */
  1685      long vi = *p - 1;
  1686      switch (c) {
  1687          case '[': return skip_array_fast(src, p);
  1688          case '{': return skip_object_fast(src, p);
  1689          case '"': return skip_string_fast(src, p);
  1690          case '-': case '0' ... '9': return skip_number_fast(src, p);
  1691          case 't': case 'n': { if (*p + 3 <= src->len) { *p += 3; } else { return -ERR_EOF; } }; break;
  1692          case 'f': { if (*p + 4 <= src->len) { *p += 4; } else { return -ERR_EOF; } }; break;
  1693          case  0 : return -ERR_EOF;
  1694          default : *p -= 1; return -ERR_INVAL; // backward error position
  1695      }
  1696      return vi;
  1697  }
  1698  
  1699  
  1700  static always_inline GoKind kind(const GoIface* iface) {
  1701      return (iface->type->kind_flags) &  GO_KIND_MASK;
  1702  }
  1703  
  1704  static always_inline bool is_int(const GoIface* iface) {
  1705      return iface->type != NULL && kind(iface) == Int;
  1706  }
  1707  
  1708  static always_inline bool is_str(const GoIface* iface) {
  1709      return iface->type != NULL && kind(iface) == String;
  1710  }
  1711  
  1712  static always_inline GoString get_str(const GoIface* iface) {
  1713      return *(GoString*)(iface->value);
  1714  }
  1715  
  1716  static always_inline int64_t get_int(const GoIface* iface) {
  1717      return *(int64_t*)(iface->value);
  1718  }
  1719  
  1720  // xmemcmpeq return true if s1 and s2 is equal for the n bytes, otherwise, return false.
  1721  static always_inline bool xmemcmpeq(const char * s1, const char * s2, size_t n) {
  1722      bool c1, c2;
  1723  #if USE_AVX2
  1724      while (n >= 32) {
  1725          __m256i  v1   = _mm256_loadu_si256((const void *)s1);
  1726          __m256i  v2   = _mm256_loadu_si256((const void *)s2);
  1727          uint32_t mask = ~((uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)));
  1728          if (mask) return false;
  1729          s1 += 32;
  1730          s2 += 32;
  1731          n  -= 32;
  1732      };
  1733      c1 = vec_cross_page(s1, 32);
  1734      c2 = vec_cross_page(s2, 32);
  1735      // not cross page
  1736      if (!c1 && !c2) {
  1737          __m256i  v1   = _mm256_loadu_si256((const void *)s1);
  1738          __m256i  v2   = _mm256_loadu_si256((const void *)s2);
  1739          uint32_t mask = ~((uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)));
  1740          bool eq = (mask == 0) || (__builtin_ctzll(mask) >= n);
  1741          return eq;
  1742      }
  1743  #endif
  1744      while (n >= 16) {
  1745          __m128i  v1   = _mm_loadu_si128((const void *)s1);
  1746          __m128i  v2   = _mm_loadu_si128((const void *)s2);
  1747          uint16_t mask = ~((uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)));
  1748          if (mask != 0) return false;
  1749          s1 += 16;
  1750          s2 += 16;
  1751          n  -= 16;
  1752      };
  1753      c1 = vec_cross_page(s1, 16);
  1754      c2 = vec_cross_page(s2, 16);
  1755      // not cross page
  1756      if (!c1 && !c2) {
  1757          __m128i  v1   = _mm_loadu_si128((const void *)s1);
  1758          __m128i  v2   = _mm_loadu_si128((const void *)s2);
  1759          uint16_t mask = ~((uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)));
  1760          bool eq = (mask == 0) || (__builtin_ctzll(mask) >= n);
  1761          return eq;
  1762      }
  1763      // cross page
  1764      while (n > 0 && *s1++ == *s2++) n--;
  1765      return n == 0;
  1766  }
  1767  
  1768  // match_key return negative if errors, zero if not matched, one if matched.
  1769  static always_inline long match_key(const GoString *src, long *p, const GoString key) {
  1770      static const long not_match = 0;
  1771      int64_t v = -1;
  1772      long si = *p;
  1773      long se = advance_string_default(src, *p, &v);
  1774      if (unlikely(se < 0)) {
  1775          *p = src->len;
  1776          return -ERR_EOF;
  1777      }
  1778  
  1779      /* update position */
  1780      *p = se;
  1781  
  1782      /* compare non-escaped strings */
  1783      if (likely(v == -1 || v > se)) {
  1784          long sn = se - si - 1;
  1785  
  1786          // check empty keys
  1787          if (!sn && !key.len) {
  1788              return true;
  1789          }
  1790          
  1791          return sn == key.len && xmemcmpeq(src->buf + si, key.buf, key.len);
  1792      }
  1793  
  1794      /* deal with escaped strings */
  1795      char buf[8] = {0}; // escaped buffer
  1796      const char* sp = src->buf + si;
  1797      const char* end = src->buf + se - 1;
  1798      const char* kp = key.buf;
  1799      const char* ke = key.buf + key.len;
  1800      while (sp < end && kp < ke) {
  1801          if (*sp == '\\') {
  1802              long en = unescape(&sp, end, buf);
  1803              if (en < 0) {
  1804                  *p = sp - src->buf;
  1805                  return en;
  1806              }
  1807              const char* ee = buf + en;
  1808              const char* ep = buf;
  1809              while (kp < ke && ep < ee && *kp == *ep) kp++, ep++;
  1810              if (ep != ee) {
  1811                  return not_match;
  1812              }
  1813          } else if (*sp == *kp) {
  1814              sp++, kp++; 
  1815          } else {
  1816              return not_match;
  1817          }
  1818      };
  1819      return sp == end && kp == ke;
  1820  }
  1821  
  1822  long get_by_path(const GoString *src, long *p, const GoSlice *path, StateMachine* sm) {
  1823      GoIface *ps = (GoIface*)(path->buf);
  1824      GoIface *pe = (GoIface*)(path->buf) + path->len;
  1825      char c = 0;
  1826      int64_t index;
  1827      long found;
  1828  
  1829  query:
  1830      /* to be safer for invalid json, use slower skip for the demanded fields */
  1831      if (ps == pe) {
  1832          return skip_one(src, p, sm, 0);
  1833      }
  1834  
  1835      /* match type: should query key in object, query index in array */
  1836      c = advance_ns(src, p);
  1837      if (is_str(ps)) {
  1838          if (c != '{') {
  1839              goto err_inval;
  1840          }
  1841          goto skip_in_obj;
  1842      } else if (is_int(ps)) {
  1843          if (c != '[') {
  1844              goto err_inval;
  1845          }
  1846  
  1847          index = get_int(ps);
  1848          if (index < 0) {
  1849              goto err_path;
  1850          }
  1851  
  1852          goto skip_in_arr;
  1853      } else {
  1854          goto err_path;
  1855      }
  1856  
  1857  skip_in_obj:
  1858      c = advance_ns(src, p);
  1859      if (c == '}') {
  1860          goto not_found;
  1861      }
  1862      if (c != '"') {
  1863          goto err_inval;
  1864      }
  1865  
  1866      /* parse the object key */
  1867      found = match_key(src, p, get_str(ps));
  1868      if (found < 0) {
  1869          return found; // parse string errors
  1870      }
  1871  
  1872      /* value should after : */
  1873      c = advance_ns(src, p);
  1874      if (c != ':') {
  1875          goto err_inval;
  1876      }
  1877      if (found) {
  1878          ps++;
  1879          goto query;
  1880      }
  1881  
  1882      /* skip the unknown fields */
  1883      skip_one_fast(src, p);
  1884      c = advance_ns(src, p);
  1885      if (c == '}') {
  1886          goto not_found;
  1887      }
  1888      if (c != ',') {
  1889          goto err_inval;
  1890      }
  1891      goto skip_in_obj;
  1892  
  1893  skip_in_arr:
  1894      /* check empty array */
  1895      c = advance_ns(src, p);
  1896      if (c == ']') {
  1897          goto not_found;
  1898      }
  1899      *p -= 1;
  1900  
  1901      /* skip array elem one by one */
  1902      while (index-- > 0) {
  1903          skip_one_fast(src, p);
  1904          c = advance_ns(src, p);
  1905          if (c == ']') {
  1906              goto not_found;
  1907          }
  1908          if (c != ',') {
  1909              goto err_inval;
  1910          }
  1911      }
  1912      ps++;
  1913      goto query;
  1914  
  1915  not_found:
  1916      *p -= 1; // backward error position
  1917      return -ERR_NOT_FOUND;
  1918  err_inval:
  1919      *p -= 1;
  1920      return -ERR_INVAL;
  1921  err_path:
  1922      *p -= 1;
  1923      return -ERR_UNSUPPORT_TYPE;
  1924  }
  1925  
  1926  // 
  1927  long validate_utf8(const GoString *src, long *p, StateMachine *m) {
  1928      xassert(*p >= 0 && src->len > *p);
  1929      return validate_utf8_with_errors(src->buf, src->len, p, m);
  1930  }
  1931  
  1932  // validate_utf8_fast returns zero if valid, otherwise, the error position.
  1933  long validate_utf8_fast(const GoString *s) {
  1934  #if USE_AVX2
  1935      /* fast path for valid utf8 */
  1936      if (validate_utf8_avx2(s) == 0) {
  1937          return 0;
  1938      }
  1939  #endif
  1940      return validate_utf8_errors(s);
  1941  }