github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/scanning.h (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #pragma once
    18  
    19  #include "native.h"
    20  #include "utf8.h"
    21  #include "utils.h"
    22  #include "parsing.h"
    23  #include "lspace.h"
    24  #include "atof_native.h"
    25  #include "atof_eisel_lemire.h"
    26  
    27  static always_inline long skip_number_1(const GoString *src, long *p);
    28  static always_inline void vnumber_1(const GoString *src, long *p, JsonState *ret);
    29  static always_inline long skip_string_1(const GoString *src, long *p, uint64_t flags);
    30  static always_inline long skip_positive_1(const GoString *src, long *p);
    31  static always_inline long skip_negative_1(const GoString *src, long *p);
    32  
    33  static const uint64_t ODD_MASK  = 0xaaaaaaaaaaaaaaaa;
    34  static const uint64_t EVEN_MASK = 0x5555555555555555;
    35  
    36  // NOTE: mask referenced from decoder/decoder.go
    37  static const uint64_t MASK_VALIDATE_STRING = 1ull << 5;
    38  static const uint64_t MASK_ALLOW_CONTROL   = 1ull << 31;
    39  
    40  static const double P10_TAB[23] = {
    41      /* <= the connvertion to double is not exact when less than 1 => */     1e-000,
    42      1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010,
    43      1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020,
    44      1e+021, 1e+022 /* <= the connvertion to double is not exact when larger,  => */
    45  };
    46  
    47  static always_inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo) {
    48      uint32_t v;
    49      uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v);
    50  
    51      /* set the carry */
    52      *vo = c;
    53      return v;
    54  }
    55  
    56  static always_inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo) {
    57      unsigned long long v;
    58      uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
    59  
    60      /* set the carry */
    61      *vo = c;
    62      return v;
    63  }
    64  
    65  static always_inline char isspace(char ch) {
    66      return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t';
    67  }
    68  
    69  const int MASK_USE_NUMBER = 1<<1;
    70  
    71  static always_inline void vdigits(const GoString *src, long *p, JsonState *ret, uint64_t flag) {
    72      --*p;
    73      if (flag & MASK_USE_NUMBER) {
    74          long i = skip_number_1(src, p);
    75          if (i < 0) {
    76              ret->vt = i;
    77              return;
    78          }
    79          ret->vt = V_DOUBLE;
    80          ret->ep = i;
    81          return;
    82      }
    83      vnumber_1(src, p, ret);
    84  }
    85  
    86  static always_inline char advance_ns(const GoString *src, long *p) {
    87      size_t       vi = *p;
    88      size_t       nb = src->len;
    89      const char * sp = src->buf;
    90  
    91      /* it's likely to run into non-spaces within a few
    92       * characters, so test up to 4 characters manually */
    93      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    94      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    95      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    96      if (vi < nb && !isspace(sp[vi])) goto nospace; else vi++;
    97  
    98      /* check EOF */
    99      if (vi >= nb) {
   100          *p = vi;
   101          return 0;
   102      }
   103  
   104      /* too many spaces, use SIMD to search for characters */
   105      if ((vi = lspace_1(sp, nb, vi)) >= nb) {
   106          return 0;
   107      }
   108  
   109  nospace:
   110      *p = vi + 1;
   111      return src->buf[vi];
   112  }
   113  
   114  static always_inline int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val) {
   115      if (*p > src->len + dec - 4) {
   116          *p = src->len;
   117          return -ERR_EOF;
   118      } else if (*(uint32_t *)(src->buf + *p - dec) == val) {
   119          *p += 4 - dec;
   120          return ret;
   121      } else {
   122          *p -= dec;
   123          for (int i = 0; src->buf[*p] == (val & 0xff) && i < 4; i++, ++*p) { val >>= 8; }
   124          return -ERR_INVAL;
   125      }
   126  }
   127  
   128  static always_inline ssize_t advance_string_default(const GoString *src, long p, int64_t *ep) {
   129      char     ch;
   130      uint64_t es;
   131      uint64_t fe;
   132      uint64_t os;
   133      uint64_t m0;
   134      uint64_t m1;
   135      uint64_t cr = 0;
   136  
   137      /* prevent out-of-bounds accessing */
   138      if (unlikely(src->len == p)) {
   139          return -ERR_EOF;
   140      }
   141  
   142      /* buffer pointers */
   143      size_t       nb = src->len;
   144      const char * sp = src->buf;
   145      const char * ss = src->buf;
   146  
   147  #define ep_init()   *ep = -1;
   148  #define ep_setc()   ep_setx(sp - ss - 1)
   149  #define ep_setx(x)  if (*ep == -1) { *ep = (x); }
   150  
   151      /* seek to `p` */
   152      nb -= p;
   153      sp += p;
   154      ep_init()
   155  
   156  #if USE_AVX2
   157      /* initialize vectors */
   158      __m256i v0;
   159      __m256i v1;
   160      __m256i q0;
   161      __m256i q1;
   162      __m256i x0;
   163      __m256i x1;
   164      __m256i cq = _mm256_set1_epi8('"');
   165      __m256i cx = _mm256_set1_epi8('\\');
   166  
   167      /* partial masks */
   168      uint32_t s0;
   169      uint32_t s1;
   170      uint32_t t0;
   171      uint32_t t1;
   172  #else
   173      /* initialize vectors */
   174      __m128i v0;
   175      __m128i v1;
   176      __m128i v2;
   177      __m128i v3;
   178      __m128i q0;
   179      __m128i q1;
   180      __m128i q2;
   181      __m128i q3;
   182      __m128i x0;
   183      __m128i x1;
   184      __m128i x2;
   185      __m128i x3;
   186      __m128i cq = _mm_set1_epi8('"');
   187      __m128i cx = _mm_set1_epi8('\\');
   188  
   189      /* partial masks */
   190      uint32_t s0;
   191      uint32_t s1;
   192      uint32_t s2;
   193      uint32_t s3;
   194      uint32_t t0;
   195      uint32_t t1;
   196      uint32_t t2;
   197      uint32_t t3;
   198  #endif
   199  
   200  #define m0_mask(add)                \
   201      m1 &= ~cr;                      \
   202      fe  = (m1 << 1) | cr;           \
   203      os  = (m1 & ~fe) & ODD_MASK;    \
   204      es  = add(os, m1, &cr) << 1;    \
   205      m0 &= ~(fe & (es ^ EVEN_MASK));
   206  
   207      /* 64-byte SIMD loop */
   208      while (likely(nb >= 64)) {
   209  #if USE_AVX2
   210          v0 = _mm256_loadu_si256   ((const void *)(sp +  0));
   211          v1 = _mm256_loadu_si256   ((const void *)(sp + 32));
   212          q0 = _mm256_cmpeq_epi8    (v0, cq);
   213          q1 = _mm256_cmpeq_epi8    (v1, cq);
   214          x0 = _mm256_cmpeq_epi8    (v0, cx);
   215          x1 = _mm256_cmpeq_epi8    (v1, cx);
   216          s0 = _mm256_movemask_epi8 (q0);
   217          s1 = _mm256_movemask_epi8 (q1);
   218          t0 = _mm256_movemask_epi8 (x0);
   219          t1 = _mm256_movemask_epi8 (x1);
   220          m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
   221          m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
   222  #else
   223          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   224          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   225          v2 = _mm_loadu_si128   ((const void *)(sp + 32));
   226          v3 = _mm_loadu_si128   ((const void *)(sp + 48));
   227          q0 = _mm_cmpeq_epi8    (v0, cq);
   228          q1 = _mm_cmpeq_epi8    (v1, cq);
   229          q2 = _mm_cmpeq_epi8    (v2, cq);
   230          q3 = _mm_cmpeq_epi8    (v3, cq);
   231          x0 = _mm_cmpeq_epi8    (v0, cx);
   232          x1 = _mm_cmpeq_epi8    (v1, cx);
   233          x2 = _mm_cmpeq_epi8    (v2, cx);
   234          x3 = _mm_cmpeq_epi8    (v3, cx);
   235          s0 = _mm_movemask_epi8 (q0);
   236          s1 = _mm_movemask_epi8 (q1);
   237          s2 = _mm_movemask_epi8 (q2);
   238          s3 = _mm_movemask_epi8 (q3);
   239          t0 = _mm_movemask_epi8 (x0);
   240          t1 = _mm_movemask_epi8 (x1);
   241          t2 = _mm_movemask_epi8 (x2);
   242          t3 = _mm_movemask_epi8 (x3);
   243          m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
   244          m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
   245  #endif
   246          /** update first quote position */
   247          if (unlikely(m1 != 0)) {
   248              ep_setx(sp - ss + __builtin_ctzll(m1))
   249          }
   250  
   251          /** mask all the escaped quotes */
   252          if (unlikely(m1 != 0 || cr != 0)) {
   253              m0_mask(add64)
   254          }
   255         
   256          /* check for end quote */
   257          if (m0 != 0) {
   258              return sp - ss + __builtin_ctzll(m0) + 1;
   259          }
   260  
   261          /* move to the next block */
   262          sp += 64;
   263          nb -= 64;
   264      }
   265  
   266      /* 32-byte SIMD round */
   267      if (likely(nb >= 32)) {
   268  #if USE_AVX2
   269          v0 = _mm256_loadu_si256   ((const void *)sp);
   270          q0 = _mm256_cmpeq_epi8    (v0, cq);
   271          x0 = _mm256_cmpeq_epi8    (v0, cx);
   272          s0 = _mm256_movemask_epi8 (q0);
   273          t0 = _mm256_movemask_epi8 (x0);
   274          m0 = (uint64_t)s0;
   275          m1 = (uint64_t)t0;
   276  #else
   277          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   278          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   279          q0 = _mm_cmpeq_epi8    (v0, cq);
   280          q1 = _mm_cmpeq_epi8    (v1, cq);
   281          x0 = _mm_cmpeq_epi8    (v0, cx);
   282          x1 = _mm_cmpeq_epi8    (v1, cx);
   283          s0 = _mm_movemask_epi8 (q0);
   284          s1 = _mm_movemask_epi8 (q1);
   285          t0 = _mm_movemask_epi8 (x0);
   286          t1 = _mm_movemask_epi8 (x1);
   287          m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
   288          m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
   289  #endif
   290         
   291          /** update first quote position */
   292          if (unlikely(m1 != 0)) {
   293              ep_setx(sp - ss + __builtin_ctzll(m1))
   294          }
   295  
   296          /** mask all the escaped quotes */
   297          if (unlikely(m1 != 0 || cr != 0)) {
   298              m0_mask(add32)
   299          }
   300         
   301          /* check for end quote */
   302          if (m0 != 0) {
   303              return sp - ss + __builtin_ctzll(m0) + 1;
   304          }
   305  
   306          /* move to the next block */
   307          sp += 32;
   308          nb -= 32;
   309      }
   310  
   311      /* check for carry */
   312      if (unlikely(cr != 0)) {
   313          if (nb == 0) {
   314              return -ERR_EOF;
   315          } else {
   316              ep_setc()
   317              sp++, nb--;
   318          }
   319      }
   320  
   321      /* handle the remaining bytes with scalar code */
   322      while (nb-- > 0 && (ch = *sp++) != '"') {
   323          if (unlikely(ch == '\\')) {
   324              if (nb == 0) {
   325                  return -ERR_EOF;
   326              } else {
   327                  ep_setc()
   328                  sp++, nb--;
   329              }
   330          }
   331      }
   332  
   333  #undef ep_init
   334  #undef ep_setc
   335  #undef ep_setx
   336  #undef m0_mask
   337  
   338      /* check for quotes */
   339      if (ch == '"') {
   340          return sp - ss;
   341      } else {
   342          return -ERR_EOF;
   343      }
   344  }
   345  
   346  #if USE_AVX2
   347  
   348  static always_inline int _mm256_get_mask(__m256i v, __m256i t) {
   349      return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, t));
   350  }
   351  
   352  // contrl char: 0x00 ~ 0x1F
   353  static always_inline int _mm256_cchars_mask(__m256i v) {
   354      __m256i e1 = _mm256_cmpgt_epi8 (v, _mm256_set1_epi8(-1));
   355      __m256i e2 = _mm256_cmpgt_epi8 (v, _mm256_set1_epi8(31));
   356      return    _mm256_movemask_epi8 (_mm256_andnot_si256 (e2, e1));
   357  }
   358  
   359  // ascii: 0x00 ~ 0x7F
   360  static always_inline int _mm256_nonascii_mask(__m256i v) {
   361      return _mm256_movemask_epi8(v);
   362  }
   363  
   364  #endif
   365  
   366  static always_inline int _mm_get_mask(__m128i v, __m128i t) {
   367      return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t));
   368  }
   369  
   370  // contrl char: 0x00 ~ 0x1F
   371  static always_inline int _mm_cchars_mask(__m128i v) {
   372      __m128i e1 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(-1));
   373      __m128i e2 = _mm_cmpgt_epi8 (v, _mm_set1_epi8(31));
   374      return    _mm_movemask_epi8 (_mm_andnot_si128 (e2, e1));
   375  }
   376  
   377  // ascii: 0x00 ~ 0x7F
   378  static always_inline int _mm_nonascii_mask(__m128i v) {
   379      return _mm_movemask_epi8(v);
   380  }
   381  
   382  static always_inline ssize_t advance_string_validate(const GoString *src, long p, int64_t *ep) {
   383      char     ch;
   384      uint64_t m0, m1, m2;
   385      uint64_t es, fe, os;
   386      uint64_t cr = 0;
   387      long     qp = 0;
   388      long     np = 0;
   389  
   390      /* buffer pointers */
   391      size_t       nb = src->len;
   392      const char * sp = src->buf;
   393      const char * ss = src->buf;
   394  
   395      /* prevent out-of-bounds accessing */
   396      if (unlikely(nb == p)) {
   397          return -ERR_EOF;
   398      }
   399  
   400  #define ep_init()    *ep = -1;
   401  #define ep_setc()    ep_setx(sp - ss - 1)
   402  #define ep_setx(x)   if (*ep == -1) { *ep = (x); }
   403  #define ep_seterr(x)  *ep = (x);
   404  
   405      /* seek to `p` */
   406      nb -= p;
   407      sp += p;
   408      ep_init()
   409  
   410  #if USE_AVX2
   411      /* initialize vectors */
   412      __m256i v0;
   413      __m256i v1;
   414      __m256i cq = _mm256_set1_epi8('"');
   415      __m256i cx = _mm256_set1_epi8('\\');
   416  
   417      /* partial masks */
   418      uint32_t s0, s1;
   419      uint32_t t0, t1;
   420      uint32_t c0, c1;
   421  #else
   422      /* initialize vectors */
   423      __m128i v0;
   424      __m128i v1;
   425      __m128i v2;
   426      __m128i v3;
   427      __m128i cq = _mm_set1_epi8('"');
   428      __m128i cx = _mm_set1_epi8('\\');
   429  
   430      /* partial masks */
   431      uint32_t s0, s1, s2, s3;
   432      uint32_t t0, t1, t2, t3;
   433      uint32_t c0, c1, c2, c3;
   434  #endif
   435  
   436  #define m0_mask(add)                \
   437      m1 &= ~cr;                      \
   438      fe  = (m1 << 1) | cr;           \
   439      os  = (m1 & ~fe) & ODD_MASK;    \
   440      es  = add(os, m1, &cr) << 1;    \
   441      m0 &= ~(fe & (es ^ EVEN_MASK));
   442  
   443      /* 64-byte SIMD loop */
   444      while (likely(nb >= 64)) {
   445  #if USE_AVX2
   446          v0 = _mm256_loadu_si256   ((const void *)(sp +  0));
   447          v1 = _mm256_loadu_si256   ((const void *)(sp + 32));
   448          s0 = _mm256_get_mask(v0, cq);
   449          s1 = _mm256_get_mask(v1, cq);
   450          t0 = _mm256_get_mask(v0, cx);
   451          t1 = _mm256_get_mask(v1, cx);
   452          c0 = _mm256_cchars_mask(v0);
   453          c1 = _mm256_cchars_mask(v1);
   454          m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
   455          m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
   456          m2 = ((uint64_t)c1 << 32) | (uint64_t)c0;
   457  #else
   458          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   459          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   460          v2 = _mm_loadu_si128   ((const void *)(sp + 32));
   461          v3 = _mm_loadu_si128   ((const void *)(sp + 48));
   462          s0 = _mm_get_mask(v0, cq);
   463          s1 = _mm_get_mask(v1, cq);
   464          s2 = _mm_get_mask(v2, cq);
   465          s3 = _mm_get_mask(v3, cq);
   466          t0 = _mm_get_mask(v0, cx);
   467          t1 = _mm_get_mask(v1, cx);
   468          t2 = _mm_get_mask(v2, cx);
   469          t3 = _mm_get_mask(v3, cx);
   470          c0 = _mm_cchars_mask(v0);
   471          c1 = _mm_cchars_mask(v1);
   472          c2 = _mm_cchars_mask(v2);
   473          c3 = _mm_cchars_mask(v3);
   474          m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
   475          m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
   476          m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0;
   477  
   478  #endif
   479         
   480          /** update first quote position */
   481          if (unlikely(m1 != 0)) {
   482              ep_setx(sp - ss + __builtin_ctzll(m1))
   483          }
   484  
   485          /** mask all the escaped quotes */
   486          if (unlikely(m1 != 0 || cr != 0)) {
   487              m0_mask(add64)
   488          }
   489  
   490          qp = m0 ? __builtin_ctzll(m0) : 64;
   491          np = m2 ? __builtin_ctzll(m2) : 64;
   492         
   493          /* get the position of end quote */
   494          if (m0 != 0) {
   495              /* check control chars in JSON string */
   496              if (unlikely(np < qp)) {
   497                  ep_seterr(sp - ss + np)
   498                 
   499                  return -ERR_INVAL;
   500              }
   501              return sp - ss + qp + 1;
   502          }
   503  
   504          /* check control chars in JSON string */
   505          if (unlikely(m2 != 0)) {
   506              ep_setx(sp - ss + np)
   507             
   508              return -ERR_INVAL;
   509          }
   510  
   511          /* move to the next block */
   512          sp += 64;
   513          nb -= 64;
   514      }
   515  
   516      /* 32-byte SIMD round */
   517      if (likely(nb >= 32)) {
   518  #if USE_AVX2
   519          v0 = _mm256_loadu_si256   ((const void *)sp);
   520          s0 = _mm256_get_mask (v0, cq);
   521          t0 = _mm256_get_mask (v0, cx);
   522          c0 = _mm256_cchars_mask(v0);
   523          m0 = (uint64_t)s0;
   524          m1 = (uint64_t)t0;
   525          m2 = (uint64_t)c0;
   526  #else
   527          v0 = _mm_loadu_si128   ((const void *)(sp +  0));
   528          v1 = _mm_loadu_si128   ((const void *)(sp + 16));
   529          s0 = _mm_get_mask(v0, cq);
   530          s1 = _mm_get_mask(v1, cq);
   531          t0 = _mm_get_mask(v0, cx);
   532          t1 = _mm_get_mask(v1, cx);
   533          c0 = _mm_cchars_mask(v0);
   534          c1 = _mm_cchars_mask(v1);
   535          m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
   536          m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
   537          m2 = ((uint64_t)c1 << 16) | (uint64_t)c0;
   538  #endif
   539         
   540          /** update first quote position */
   541          if (unlikely(m1 != 0)) {
   542              ep_setx(sp - ss + __builtin_ctzll(m1))
   543          }
   544  
   545          /** mask all the escaped quotes */
   546          if (unlikely(m1 != 0 || cr != 0)) {
   547              m0_mask(add32)
   548          }
   549         
   550          qp = m0 ? __builtin_ctzll(m0) : 64;
   551          np = m2 ? __builtin_ctzll(m2) : 64;
   552         
   553          /* get the position of end quote */
   554          if (m0 != 0) {
   555              if (unlikely(np < qp)) {
   556                  ep_seterr(sp - ss + np)
   557                  return -ERR_INVAL;
   558              }
   559              return sp - ss + qp + 1;
   560          }
   561  
   562          /* check control chars in JSON string */
   563          if (unlikely(m2 != 0)) {
   564              ep_seterr(sp - ss + __builtin_ctzll(m2))
   565              return -ERR_INVAL;
   566          }
   567  
   568          /* move to the next block */
   569          sp += 32;
   570          nb -= 32;
   571      }
   572  
   573      /* check for carry */
   574      if (unlikely(cr != 0)) {
   575          if (nb == 0) {
   576              return -ERR_EOF;
   577          } else {
   578              ep_setc()
   579              sp++, nb--;
   580          }
   581      }
   582  
   583      /* handle the remaining bytes with scalar code */
   584      while (nb > 0) {
   585          ch = *sp;
   586          if (ch == '"') {
   587             
   588              return sp - ss + 1;
   589          }
   590  
   591          /* valid the escaped chars */
   592          if (unlikely(ch == '\\')) {
   593              if (nb == 1) {
   594                  return -ERR_EOF;
   595              }
   596              ep_setx(sp - ss)
   597              sp += 2, nb -= 2;
   598              continue;
   599          }
   600  
   601          /* valid unescaped chars */
   602          if (unlikely( ch >= 0 && ch <= 0x1f)) { // control chars
   603              ep_seterr(sp - ss)
   604              return -ERR_INVAL;
   605          }
   606  
   607          sp++, nb--;
   608      }
   609      return -ERR_EOF;
   610  #undef ep_init
   611  #undef ep_setc
   612  #undef ep_setx
   613  #undef ep_seterr
   614  #undef m0_mask
   615  }
   616  
   617  static always_inline ssize_t advance_string(const GoString *src, long p, int64_t *ep, uint64_t flags) {
   618      if ((flags & MASK_VALIDATE_STRING) != 0) {
   619          return advance_string_validate(src, p, ep);
   620      } else {
   621          return advance_string_default(src, p, ep);
   622      }
   623  }
   624  
   625  #define set_vt(t)   \
   626      ret->vt = t;
   627  
   628  #define init_ret(t) \
   629      ret->vt = t;    \
   630      ret->dv = 0.0;  \
   631      ret->iv = 0;    \
   632      ret->ep = *p;
   633  
   634  #define check_eof()         \
   635      if (i >= n) {           \
   636          *p = n;             \
   637          ret->vt = -ERR_EOF; \
   638          return;             \
   639      }
   640  
   641  #define check_sign(on_neg)  \
   642      if (s[i] == '-') {      \
   643          i++;                \
   644          on_neg;             \
   645          check_eof()         \
   646      }
   647  
   648  #define check_digit()               \
   649      if (s[i] < '0' || s[i] > '9') { \
   650          *p = i;                     \
   651          ret->vt = -ERR_INVAL;       \
   652          return;                     \
   653      }
   654  
   655  #define check_leading_zero()                                                                    \
   656      if (s[i] == '0' && (i >= n || (s[i + 1] != '.' && s[i + 1] != 'e' && s[i + 1] != 'E'))) {   \
   657          *p = ++i;                                                                               \
   658          return;                                                                                 \
   659      }
   660  
   661  #define parse_sign(sgn)                 \
   662      if (s[i] == '+' || s[i] == '-') {   \
   663          sgn = s[i++] == '+' ? 1 : -1;   \
   664          check_eof()                     \
   665      }
   666  
   667  #define is_digit(val) \
   668      '0' <= val && val <= '9'
   669  
   670  #define add_integer_to_mantissa(man, man_nd, exp10, dig) \
   671      if (man_nd < 19) {                                   \
   672          man = man * 10 + dig;                            \
   673          man_nd++;                                        \
   674      } else {                                             \
   675          exp10++;                                         \
   676      }
   677  
   678  #define add_float_to_mantissa(man, man_nd, exp10, dig) \
   679      man = man * 10 + dig;                              \
   680      man_nd++;                                          \
   681      exp10--;
   682  
   683  #define parse_float_digits(val, sgn, ...)                       \
   684      while (i < n && s[i] >= '0' && s[i] <= '9' __VA_ARGS__) {   \
   685          val *= 10;                                              \
   686          val += sgn * (s[i++] - '0');                            \
   687      }
   688  
   689  #define parse_integer_digits(val, sgn, ovf)                     \
   690      while (i < n && s[i] >= '0' && s[i] <= '9') {               \
   691          if (add_digit_overflow(val, sgn * (s[i++] - '0'))) {    \
   692              ovf = 1;                                            \
   693              break;                                              \
   694          }                                                       \
   695      }
   696  
   697  #define add_digit_overflow(val, chr) (          \
   698      __builtin_mul_overflow(val, 10, &val) ||    \
   699      __builtin_add_overflow(val, chr, &val)      \
   700  )
   701  
   702  #define vinteger(type, sgn, on_neg)                     \
   703      int  ovf = 0;                                       \
   704      type val = 0;                                       \
   705                                                          \
   706      /* initial buffer pointers */                       \
   707      long         i = *p;                                \
   708      size_t       n = src->len;                          \
   709      const char * s = src->buf;                          \
   710                                                          \
   711      /* initialize the result, and check for '-' */      \
   712      init_ret(V_INTEGER)                                 \
   713      check_eof()                                         \
   714      check_sign(on_neg)                                  \
   715                                                          \
   716      /* check for leading zero or any digits */          \
   717      check_digit()                                       \
   718      check_leading_zero()                                \
   719      parse_integer_digits(val, sgn, ovf)                 \
   720                                                          \
   721      /* check for overflow */                            \
   722      if (ovf) {                                          \
   723          *p = i - 1;                                     \
   724          ret->vt = -ERR_OVERFLOW;                        \
   725          return;                                         \
   726      }                                                   \
   727                                                          \
   728      /* check for the decimal part */                    \
   729      if (i < n && s[i] == '.') {                         \
   730          *p = i;                                         \
   731          ret->vt = -ERR_NUMBER_FMT;                      \
   732          return;                                         \
   733      }                                                   \
   734                                                          \
   735      /* check for the exponent part */                   \
   736      if (i < n && (s[i] == 'e' || s[i] == 'E')) {        \
   737          *p = i;                                         \
   738          ret->vt = -ERR_NUMBER_FMT;                      \
   739          return;                                         \
   740      }                                                   \
   741                                                          \
   742      /* update the result */                             \
   743      *p = i;                                             \
   744      ret->iv = val;
   745  
   746  /** check whether float can represent the val exactly **/
   747  static always_inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val) {
   748      *val = (double)man;
   749  
   750      if (man >> 52 != 0) {
   751          return false;
   752      }
   753  
   754      /* equal to if (sgn == -1) { *val *= -1; } */
   755      *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63);
   756  
   757      if (exp == 0 || man == 0) {
   758          return true;
   759      } else if (exp > 0 && exp <= 15+22) {
   760          /* uint64 integers: accurate range <= 10^15          *
   761           * Powers of 10: accurate range <= 10^22, as P10_TAB *
   762           * Example: man 1, exp 36, is ok                     */
   763          if (exp > 22) {
   764              *val *= P10_TAB[exp-22];
   765              exp = 22;
   766          }
   767  
   768          /* f is not accurate when too larger */
   769          if (*val > 1e15 || *val < -1e15) {
   770              return false;
   771          }
   772  
   773          *val *= P10_TAB[exp];
   774          return true;
   775      } else if (exp < 0 && exp >= -22) {
   776          *val /=  P10_TAB[-exp];
   777          return true;
   778      }
   779  
   780      return false;
   781  }
   782  
   783  static always_inline double atof_fast(uint64_t man, int exp, int sgn, int trunc, double *val) {
   784      double val_up = 0.0;
   785  
   786      /* look-up for fast atof if the conversion can be exactly */
   787      if (is_atof_exact(man, exp, sgn, val)) {
   788          return true;
   789      }
   790  
   791      /* A fast atof algorithm for high percison */
   792      if (atof_eisel_lemire64_1(man, exp, sgn, val)) {
   793          if (!trunc || (atof_eisel_lemire64_1(man+1, exp, sgn, &val_up) && val_up == *val)) {
   794              return true;
   795          }
   796      }
   797  
   798      return false;
   799  }
   800  
   801  static bool always_inline is_overflow(uint64_t man, int sgn, int exp10) {
   802      /* the former exp10 != 0 means man has overflowed
   803       * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */
   804      return exp10 != 0 ||
   805          ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63));
   806  }
   807  
   808  static always_inline void vnumber_1(const GoString *src, long *p, JsonState *ret) {
   809      int      sgn = 1;
   810      uint64_t man = 0; // mantissa for double (float64)
   811      int   man_nd = 0; // # digits of mantissa, 10 ^ 19 fits uint64_t
   812      int    exp10 = 0; // val = sgn * man * 10 ^ exp10
   813      int    trunc = 0;
   814      double   val = 0;
   815  
   816      /* initial buffer pointers */
   817      long         i = *p;
   818      size_t       n = src->len;
   819      const char * s = src->buf;
   820      char     *dbuf = ret->dbuf;
   821      ssize_t   dcap = ret->dcap;
   822  
   823      /* initialize the result, and check for EOF */
   824      init_ret(V_INTEGER)
   825      check_eof()
   826      check_sign(sgn = -1)
   827  
   828      /* check for leading zero */
   829      check_digit()
   830      check_leading_zero()
   831  
   832      /* parse the integer part */
   833      while (i < n && is_digit(s[i])) {
   834          add_integer_to_mantissa(man, man_nd, exp10, (s[i] - '0'))
   835          i++;
   836      }
   837  
   838      if (exp10 > 0) {
   839          trunc = 1;
   840      }
   841  
   842      /* check for decimal points */
   843      if (i < n && s[i] == '.') {
   844          i++;
   845          set_vt(V_DOUBLE)
   846          check_eof()
   847          check_digit()
   848      }
   849  
   850      /* skip the leading zeros of 0.000xxxx */
   851      if (man == 0 && exp10 == 0) {
   852          while (i < n && s[i] == '0') {
   853              i++;
   854              exp10--;
   855          }
   856          man = 0;
   857          man_nd = 0;
   858      }
   859  
   860      /* the fractional part (uint64_t mantissa can represent at most 19 digits) */
   861      while (i < n && man_nd < 19 && is_digit(s[i])) {
   862          add_float_to_mantissa(man, man_nd, exp10, (s[i] - '0'))
   863          i++;
   864      }
   865  
   866       /* skip the remaining digits */
   867      while (i < n && is_digit(s[i])) {
   868          trunc = 1;
   869          i++;
   870      }
   871  
   872      /* check for exponent */
   873      if (i < n && (s[i] == 'e' || s[i] == 'E')) {
   874          int esm = 1;
   875          int exp = 0;
   876  
   877          /* check for the '+' or '-' sign, and parse the power */
   878          i++;
   879          set_vt(V_DOUBLE)
   880          check_eof()
   881          parse_sign(esm)
   882          check_digit()
   883          while (i < n && is_digit(s[i])) {
   884              if (exp < 10000) {
   885                  exp = exp * 10 + (s[i] - '0');
   886              }
   887              i++;
   888          }
   889          exp10 += exp * esm;
   890          goto parse_float;
   891      }
   892  
   893      if (ret->vt == V_INTEGER) {
   894          if (!is_overflow(man, sgn, exp10)) {
   895              ret->iv = (int64_t)man * sgn;
   896  
   897              /* following lines equal to ret->dv = (double)(man) * sgn */
   898              ret->dv = (double)(man);
   899              *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63);
   900  
   901              *p = i;
   902              return;
   903          }
   904          set_vt(V_DOUBLE)
   905      }
   906  
   907  parse_float:
   908      /* when fast algorithms failed, use slow fallback.*/
   909      if(!atof_fast(man, exp10, sgn, trunc, &val)) {
   910          val = atof_native_1(s + *p, i - *p, dbuf, dcap);
   911      }
   912  
   913      /* check parsed double val */
   914      if (is_infinity(val)) {
   915          ret->vt = -ERR_FLOAT_INF;
   916      }
   917  
   918      /* update the result */
   919      ret->dv = val;
   920      *p = i;
   921  }
   922  
   923  /** Value Skipping FSM **/
   924  
   925  #define FSM_VAL         0
   926  #define FSM_ARR         1
   927  #define FSM_OBJ         2
   928  #define FSM_KEY         3
   929  #define FSM_ELEM        4
   930  #define FSM_ARR_0       5
   931  #define FSM_OBJ_0       6
   932  
   933  #define FSM_DROP(v)     (v)->sp--
   934  #define FSM_REPL(v, t)  (v)->vt[(v)->sp - 1] = (t)
   935  
   936  #define FSM_CHAR(c)     do { if (ch != (c)) return -ERR_INVAL; } while (0)
   937  #define FSM_XERR(v)     do { long r = (v); if (r < 0) return r; } while (0)
   938  
   939  static always_inline void fsm_init(StateMachine *self, int vt) {
   940      self->sp = 1;
   941      self->vt[0] = vt;
   942  }
   943  
   944  static always_inline long fsm_push(StateMachine *self, int vt) {
   945      if (self->sp >= MAX_RECURSE) {
   946          return -ERR_RECURSE_MAX;
   947      } else {
   948          self->vt[self->sp++] = vt;
   949          return 0;
   950      }
   951  }
   952  
   953  static always_inline long fsm_exec_1(StateMachine *self, const GoString *src, long *p, uint64_t flags) {
   954      int  vt;
   955      char ch;
   956      long vi = -1;
   957  
   958      /* run until no more nested values */
   959      while (self->sp) {
   960          ch = advance_ns(src, p);
   961          if (ch  == 0) {
   962              return -ERR_EOF;
   963          }
   964          vt = self->vt[self->sp - 1];
   965  
   966          /* set the start address if any */
   967          if (vi == -1) {
   968              vi = *p - 1;
   969          }
   970  
   971          /* check for special types */
   972          switch (vt) {
   973              default: {
   974                  FSM_DROP(self);
   975                  break;
   976              }
   977  
   978              /* arrays */
   979              case FSM_ARR: {
   980                  switch (ch) {
   981                      case ']' : FSM_DROP(self);                    continue;
   982                      case ',' : FSM_XERR(fsm_push(self, FSM_VAL)); continue;
   983                      default  : return -ERR_INVAL;
   984                  }
   985              }
   986  
   987              /* objects */
   988              case FSM_OBJ: {
   989                  switch (ch) {
   990                      case '}' : FSM_DROP(self);                    continue;
   991                      case ',' : FSM_XERR(fsm_push(self, FSM_KEY)); continue;
   992                      default  : return -ERR_INVAL;
   993                  }
   994              }
   995  
   996              /* object keys */
   997              case FSM_KEY: {
   998                  FSM_CHAR('"');
   999                  FSM_REPL(self, FSM_ELEM);
  1000                  FSM_XERR(skip_string_1(src, p, flags));
  1001                  continue;
  1002              }
  1003  
  1004              /* object element */
  1005              case FSM_ELEM: {
  1006                  FSM_CHAR(':');
  1007                  FSM_REPL(self, FSM_VAL);
  1008                  continue;
  1009              }
  1010  
  1011              /* arrays, first element */
  1012              case FSM_ARR_0: {
  1013                  if (ch == ']') {
  1014                      FSM_DROP(self);
  1015                      continue;
  1016                  } else {
  1017                      FSM_REPL(self, FSM_ARR);
  1018                      break;
  1019                  }
  1020              }
  1021  
  1022              /* objects, first pair */
  1023              case FSM_OBJ_0: {
  1024                  switch (ch) {
  1025                      default: {
  1026                          return -ERR_INVAL;
  1027                      }
  1028  
  1029                      /* empty object */
  1030                      case '}': {
  1031                          FSM_DROP(self);
  1032                          continue;
  1033                      }
  1034  
  1035                      /* the quote of the first key */
  1036                      case '"': {
  1037                          FSM_REPL(self, FSM_OBJ);
  1038                          FSM_XERR(skip_string_1(src, p, flags));
  1039                          FSM_XERR(fsm_push(self, FSM_ELEM));
  1040                          continue;
  1041                      }
  1042                  }
  1043              }
  1044          }
  1045  
  1046          /* simple values */
  1047          switch (ch) {
  1048              case '0' : /* fallthrough */
  1049              case '1' : /* fallthrough */
  1050              case '2' : /* fallthrough */
  1051              case '3' : /* fallthrough */
  1052              case '4' : /* fallthrough */
  1053              case '5' : /* fallthrough */
  1054              case '6' : /* fallthrough */
  1055              case '7' : /* fallthrough */
  1056              case '8' : /* fallthrough */
  1057              case '9' : FSM_XERR(skip_positive_1(src, p));                     break;
  1058              case '-' : FSM_XERR(skip_negative_1(src, p));                     break;
  1059              case 'n' : FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_NULL)); break;
  1060              case 't' : FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_TRUE)); break;
  1061              case 'f' : FSM_XERR(advance_dword(src, p, 0, *p - 1, VS_ALSE)); break;
  1062              case '[' : FSM_XERR(fsm_push(self, FSM_ARR_0));                 break;
  1063              case '{' : FSM_XERR(fsm_push(self, FSM_OBJ_0));                 break;
  1064              case '"' : FSM_XERR(skip_string_1(src, p, flags));                break;
  1065              case  0  : return -ERR_EOF;
  1066              default  : return -ERR_INVAL;
  1067          }
  1068      }
  1069  
  1070      /* all done */
  1071      return vi;
  1072  }
  1073  
  1074  #undef FSM_DROP
  1075  #undef FSM_REPL
  1076  #undef FSM_CHAR
  1077  #undef FSM_XERR
  1078  
  1079  #define check_bits(mv)                              \
  1080      if (unlikely((v = mv & (mv - 1)) != 0)) {       \
  1081          return -(sp - ss + __builtin_ctz(v) + 1);   \
  1082      }
  1083  
  1084  #define check_sidx(iv)          \
  1085      if (likely(iv == -1)) {     \
  1086          iv = sp - ss - 1;       \
  1087      } else {                    \
  1088          return -(sp - ss);      \
  1089      }
  1090  
  1091  #define check_vidx(iv, mv)                              \
  1092      if (mv != 0) {                                      \
  1093          if (likely(iv == -1)) {                         \
  1094              iv = sp - ss + __builtin_ctz(mv);           \
  1095          } else {                                        \
  1096              return -(sp - ss + __builtin_ctz(mv) + 1);  \
  1097          }                                               \
  1098      }
  1099  
  1100  static always_inline long do_skip_number(const char *sp, size_t nb) {
  1101      long         di = -1;
  1102      long         ei = -1;
  1103      long         si = -1;
  1104      const char * ss = sp;
  1105  
  1106      /* check for EOF */
  1107      if (nb == 0) {
  1108          return -1;
  1109      }
  1110  
  1111      /* special case of '0' */
  1112      if (*sp == '0' && (nb == 1 || (sp[1] != '.' && sp[1] != 'e' && sp[1] != 'E'))) {
  1113          return 1;
  1114      }
  1115  
  1116  #if USE_AVX2
  1117      /* can do with AVX-2 */
  1118      if (likely(nb >= 32)) {
  1119          __m256i d9 = _mm256_set1_epi8('9');
  1120          __m256i ds = _mm256_set1_epi8('/');
  1121          __m256i dp = _mm256_set1_epi8('.');
  1122          __m256i el = _mm256_set1_epi8('e');
  1123          __m256i eu = _mm256_set1_epi8('E');
  1124          __m256i xp = _mm256_set1_epi8('+');
  1125          __m256i xm = _mm256_set1_epi8('-');
  1126  
  1127          /* 32-byte loop */
  1128          do {
  1129              __m256i sb = _mm256_loadu_si256  ((const void *)sp);
  1130              __m256i i0 = _mm256_cmpgt_epi8   (sb, ds);
  1131              __m256i i9 = _mm256_cmpgt_epi8   (sb, d9);
  1132              __m256i id = _mm256_cmpeq_epi8   (sb, dp);
  1133              __m256i il = _mm256_cmpeq_epi8   (sb, el);
  1134              __m256i iu = _mm256_cmpeq_epi8   (sb, eu);
  1135              __m256i ip = _mm256_cmpeq_epi8   (sb, xp);
  1136              __m256i im = _mm256_cmpeq_epi8   (sb, xm);
  1137              __m256i iv = _mm256_andnot_si256 (i9, i0);
  1138              __m256i ie = _mm256_or_si256     (il, iu);
  1139              __m256i is = _mm256_or_si256     (ip, im);
  1140              __m256i rt = _mm256_or_si256     (iv, id);
  1141              __m256i ru = _mm256_or_si256     (ie, is);
  1142              __m256i rv = _mm256_or_si256     (rt, ru);
  1143  
  1144              /* exponent and sign position */
  1145              uint32_t md = _mm256_movemask_epi8(id);
  1146              uint32_t me = _mm256_movemask_epi8(ie);
  1147              uint32_t ms = _mm256_movemask_epi8(is);
  1148              uint32_t mr = _mm256_movemask_epi8(rv);
  1149  
  1150              /* mismatch position */
  1151              uint32_t v;
  1152              uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000);
  1153  
  1154              /* mask out excess characters */
  1155              if (i != 32) {
  1156                  md &= (1 << i) - 1;
  1157                  me &= (1 << i) - 1;
  1158                  ms &= (1 << i) - 1;
  1159              }
  1160  
  1161              /* check & update decimal point, exponent and sign index */
  1162              check_bits(md)
  1163              check_bits(me)
  1164              check_bits(ms)
  1165              check_vidx(di, md)
  1166              check_vidx(ei, me)
  1167              check_vidx(si, ms)
  1168  
  1169              /* check for valid number */
  1170              if (i != 32) {
  1171                  sp += i;
  1172                  _mm256_zeroupper();
  1173                  goto check_index;
  1174              }
  1175  
  1176              /* move to next block */
  1177              sp += 32;
  1178              nb -= 32;
  1179          } while (nb >= 32);
  1180  
  1181          /* clear the upper half to prevent AVX-SSE transition penalty */
  1182          _mm256_zeroupper();
  1183      }
  1184  #endif
  1185  
  1186      /* can do with SSE */
  1187      if (likely(nb >= 16)) {
  1188          __m128i dc = _mm_set1_epi8(':');
  1189          __m128i ds = _mm_set1_epi8('/');
  1190          __m128i dp = _mm_set1_epi8('.');
  1191          __m128i el = _mm_set1_epi8('e');
  1192          __m128i eu = _mm_set1_epi8('E');
  1193          __m128i xp = _mm_set1_epi8('+');
  1194          __m128i xm = _mm_set1_epi8('-');
  1195  
  1196          /* 16-byte loop */
  1197          do {
  1198              __m128i sb = _mm_loadu_si128 ((const void *)sp);
  1199              __m128i i0 = _mm_cmpgt_epi8  (sb, ds);
  1200              __m128i i9 = _mm_cmplt_epi8  (sb, dc);
  1201              __m128i id = _mm_cmpeq_epi8  (sb, dp);
  1202              __m128i il = _mm_cmpeq_epi8  (sb, el);
  1203              __m128i iu = _mm_cmpeq_epi8  (sb, eu);
  1204              __m128i ip = _mm_cmpeq_epi8  (sb, xp);
  1205              __m128i im = _mm_cmpeq_epi8  (sb, xm);
  1206              __m128i iv = _mm_and_si128   (i9, i0);
  1207              __m128i ie = _mm_or_si128    (il, iu);
  1208              __m128i is = _mm_or_si128    (ip, im);
  1209              __m128i rt = _mm_or_si128    (iv, id);
  1210              __m128i ru = _mm_or_si128    (ie, is);
  1211              __m128i rv = _mm_or_si128    (rt, ru);
  1212  
  1213              /* exponent and sign position */
  1214              uint32_t md = _mm_movemask_epi8(id);
  1215              uint32_t me = _mm_movemask_epi8(ie);
  1216              uint32_t ms = _mm_movemask_epi8(is);
  1217              uint32_t mr = _mm_movemask_epi8(rv);
  1218  
  1219              /* mismatch position */
  1220              uint32_t v;
  1221              uint32_t i = __builtin_ctzll(~mr | 0x00010000);
  1222  
  1223              /* mask out excess characters */
  1224              if (i != 16) {
  1225                  md &= (1 << i) - 1;
  1226                  me &= (1 << i) - 1;
  1227                  ms &= (1 << i) - 1;
  1228              }
  1229  
  1230              /* check & update exponent and sign index */
  1231              check_bits(md)
  1232              check_bits(me)
  1233              check_bits(ms)
  1234              check_vidx(di, md)
  1235              check_vidx(ei, me)
  1236              check_vidx(si, ms)
  1237  
  1238              /* check for valid number */
  1239              if (i != 16) {
  1240                  sp += i;
  1241                  goto check_index;
  1242              }
  1243  
  1244              /* move to next block */
  1245              sp += 16;
  1246              nb -= 16;
  1247          } while (nb >= 16);
  1248      }
  1249  
  1250      /* remaining bytes, do with scalar code */
  1251      while (likely(nb-- > 0)) {
  1252          switch (*sp++) {
  1253              case '0' : /* fallthrough */
  1254              case '1' : /* fallthrough */
  1255              case '2' : /* fallthrough */
  1256              case '3' : /* fallthrough */
  1257              case '4' : /* fallthrough */
  1258              case '5' : /* fallthrough */
  1259              case '6' : /* fallthrough */
  1260              case '7' : /* fallthrough */
  1261              case '8' : /* fallthrough */
  1262              case '9' : break;
  1263              case '.' : check_sidx(di); break;
  1264              case 'e' : /* fallthrough */
  1265              case 'E' : check_sidx(ei); break;
  1266              case '+' : /* fallthrough */
  1267              case '-' : check_sidx(si); break;
  1268              default  : sp--; goto check_index;
  1269          }
  1270      }
  1271  check_index:
  1272      if (di == 0 || si == 0 || ei == 0) {
  1273          return -1;
  1274      } else if (di == sp - ss - 1|| si == sp - ss - 1 || ei == sp - ss - 1) {
  1275          return -(sp - ss);
  1276      } else if (si > 0 && ei != si - 1) {
  1277          return -si - 1;
  1278      } else if (di >= 0 && ei >= 0 && di > ei - 1) {
  1279          return -di - 1;
  1280      } else if (di >= 0 && ei >= 0 && di == ei - 1) {
  1281          return -ei - 1;
  1282      } else {
  1283          return sp - ss;
  1284      }
  1285  }
  1286  
  1287  #undef check_bits
  1288  #undef check_sidx
  1289  #undef check_vidx
  1290  
  1291  static always_inline long skip_string_1(const GoString *src, long *p, uint64_t flags) {
  1292      int64_t v = -1;
  1293      ssize_t q = *p - 1; // start position
  1294      ssize_t e = advance_string(src, *p, &v, flags);
  1295  
  1296      /* check for errors */
  1297      if (e < 0) {
  1298          *p = e == -ERR_EOF ? src->len : v;
  1299          return e;
  1300      }
  1301  
  1302      /* update the position */
  1303      *p = e;
  1304      return q;
  1305  }
  1306  
  1307  static always_inline long skip_negative_1(const GoString *src, long *p) {
  1308      long i = *p;
  1309      long r = do_skip_number(src->buf + i, src->len - i);
  1310  
  1311      /* check for errors */
  1312      if (r < 0) {
  1313          *p -= r + 1;
  1314          return -ERR_INVAL;
  1315      }
  1316  
  1317      /* update value pointer */
  1318      *p += r;
  1319      return i - 1;
  1320  }
  1321  
  1322  static always_inline long skip_positive_1(const GoString *src, long *p) {
  1323      long i = *p - 1;
  1324      long r = do_skip_number(src->buf + i, src->len - i);
  1325  
  1326      /* check for errors */
  1327      if (r < 0) {
  1328          *p -= r + 2;
  1329          return -ERR_INVAL;
  1330      }
  1331  
  1332      /* update value pointer */
  1333      *p += r - 1;
  1334      return i;
  1335  }
  1336  
  1337  static always_inline long skip_number_1(const GoString *src, long *p) {
  1338      const char* ss = src->buf;
  1339      const char* sp = src->buf + *p;
  1340      size_t nb = src->len - *p;
  1341      long i = *p;
  1342      long r;
  1343      bool neg = *sp == '-';
  1344  
  1345      sp += neg;
  1346      nb -= neg;
  1347      if (unlikely(nb <= 0)) {
  1348          *p = sp - ss;
  1349          return -ERR_EOF;
  1350      }
  1351  
  1352      if (unlikely(nb > 0 && (*sp > '9' || *sp < '0'))) {
  1353          *p = sp - ss;
  1354          return -ERR_INVAL;
  1355      }
  1356  
  1357      r = do_skip_number(sp, nb);
  1358      if (unlikely(r < 0)) {
  1359          *p = sp - (r + 1) - ss;
  1360          return -ERR_INVAL;
  1361      }
  1362      *p = sp + r - ss;
  1363      return i;
  1364  }
  1365  
  1366  static always_inline long skip_one_1(const GoString *src, long *p, StateMachine *m, uint64_t flags) {
  1367      fsm_init(m, FSM_VAL);
  1368      return fsm_exec_1(m, src, p, flags);
  1369  }
  1370  
  1371  static always_inline uint64_t get_maskx64(const char *s, char c) {
  1372  #if USE_AVX2
  1373      __m256i v0 = _mm256_loadu_si256((__m256i const *)s);
  1374      __m256i v1 = _mm256_loadu_si256((__m256i const *)(s + 32));
  1375      uint32_t m0 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(v0, _mm256_set1_epi8(c)));
  1376      uint32_t m1 = _mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, _mm256_set1_epi8(c))); 
  1377      return ((uint64_t)(m1) << 32) | (uint64_t)(m0);
  1378  #else
  1379      __m128i v0 = _mm_loadu_si128((__m128i const*)s);
  1380      __m128i v1 = _mm_loadu_si128((__m128i const*)(s + 16));
  1381      __m128i v2 = _mm_loadu_si128((__m128i const*)(s + 32));
  1382      __m128i v3 = _mm_loadu_si128((__m128i const*)(s + 48));
  1383      uint32_t m0 = _mm_movemask_epi8(_mm_cmpeq_epi8(v0, _mm_set1_epi8(c)));
  1384      uint32_t m1 = _mm_movemask_epi8(_mm_cmpeq_epi8(v1, _mm_set1_epi8(c)));
  1385      uint32_t m2 = _mm_movemask_epi8(_mm_cmpeq_epi8(v2, _mm_set1_epi8(c)));
  1386      uint32_t m3 = _mm_movemask_epi8(_mm_cmpeq_epi8(v3, _mm_set1_epi8(c)));
  1387      return ((uint64_t)(m3) << 48) | ((uint64_t)(m2)  << 32) | ((uint64_t)(m1) << 16) | (uint64_t)(m0);
  1388  #endif
  1389  }
  1390  
  1391  static always_inline uint64_t get_maskx32(const char *s, char c) {
  1392  #if USE_AVX2
  1393      __m256i v0 = _mm256_loadu_si256((__m256i const *)s);
  1394      uint64_t m0 = (unsigned)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v0, _mm256_set1_epi8(c)));
  1395      return m0;
  1396  #else
  1397      __m128i v0 = _mm_loadu_si128((__m128i const*)s);
  1398      __m128i v1 = _mm_loadu_si128((__m128i const*)(s + 16));
  1399      uint64_t m0 = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(v0, _mm_set1_epi8(c)));
  1400      uint64_t m1 = (unsigned)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, _mm_set1_epi8(c)));
  1401      return m0 | (m1 << 16);
  1402  #endif
  1403  }
  1404  
  1405  // get the string (besides in quote) mask
  1406  static always_inline uint64_t get_string_maskx64(const char *s, uint64_t *prev_inquote, uint64_t *prev_bs) {
  1407      uint64_t escaped = *prev_bs;
  1408      uint64_t quote_mask = 0, bs_mask = 0;
  1409  
  1410      /* read and get the quote or backslash bitmask */
  1411      quote_mask = get_maskx64(s, '"');
  1412      bs_mask = get_maskx64(s, '\\');
  1413  
  1414      /* get the escaped bitmask */
  1415      if (bs_mask || *prev_bs) {
  1416          bs_mask &= ~(*prev_bs);
  1417          uint64_t follow_bs = (bs_mask << 1) | *prev_bs;
  1418          uint64_t bs_start = bs_mask & ~follow_bs;
  1419          uint64_t odd_start = bs_start & ODD_MASK;
  1420          uint64_t even_or_oc = add64(odd_start, bs_mask, prev_bs);
  1421          uint64_t even_or_escaped = (even_or_oc << 1) ^ EVEN_MASK;
  1422          escaped = follow_bs & even_or_escaped;
  1423      } else {
  1424          *prev_bs = 0;
  1425      }
  1426      quote_mask &= ~escaped;
  1427  
  1428      /* get the inquote bitmask */
  1429      uint64_t inquote = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_set_epi64x(0, quote_mask), _mm_set1_epi8('\xFF'), 0));
  1430      inquote ^= *prev_inquote;
  1431      *prev_inquote = (uint64_t)(((int64_t)(inquote)) >> 63);
  1432      return inquote;
  1433  }
  1434  
  1435  // get the next json structural, '}', ']' or ','。
  1436  #if USE_AVX2
  1437  static always_inline int get_structural_maskx32(const char *s) {
  1438      __m256i v = _mm256_loadu_si256((const void *)s);
  1439      __m256i e1 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8('}'));
  1440      __m256i e2 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8(']'));
  1441      __m256i e3 = _mm256_cmpeq_epi8(v, _mm256_set1_epi8(','));
  1442      __m256i sv = _mm256_or_si256(_mm256_or_si256(e1, e2), e3);
  1443      return _mm256_movemask_epi8(sv);
  1444  }
  1445  #endif
  1446  
  1447  static always_inline int get_structural_maskx16(const char *s) {
  1448      __m128i v = _mm_loadu_si128((const void *)s);
  1449      __m128i e1 = _mm_cmpeq_epi8(v, _mm_set1_epi8('}'));
  1450      __m128i e2 = _mm_cmpeq_epi8(v, _mm_set1_epi8(']'));
  1451      __m128i e3 = _mm_cmpeq_epi8(v, _mm_set1_epi8(','));
  1452      __m128i sv = _mm_or_si128(_mm_or_si128(e1, e2), e3);
  1453      return _mm_movemask_epi8(sv);
  1454  }
  1455  
  1456  // skip the number at the next '}', ']' or ',' or the ending of json.
  1457  static always_inline long skip_number_fast(const GoString *src, long *p) {
  1458      size_t nb = src->len - *p;
  1459      const char *s = src->buf + *p;
  1460      long vi = *p - 1;
  1461      int m = 0;
  1462  
  1463  #if USE_AVX2
  1464      while (likely(nb >= 32)) {
  1465          if ((m = get_structural_maskx32(s))) {
  1466              *p = s - src->buf + __builtin_ctzll(m);
  1467              return vi;
  1468          }
  1469          s += 32, nb -= 32;
  1470      }
  1471  #endif
  1472  
  1473      while (likely(nb >= 16)) {
  1474          if ((m = get_structural_maskx16(s))) {
  1475              *p = s - src->buf + __builtin_ctzll(m);
  1476              return vi;
  1477          }
  1478          s += 16, nb -= 16;
  1479      }
  1480  
  1481      while (likely(nb > 0)) {
  1482          if (*s == '}' || *s == ']' || *s == ',') {
  1483              *p = s - src->buf;
  1484              return vi;
  1485          }
  1486          s++, nb--;
  1487      }
  1488      *p = s - src->buf;
  1489      return vi;
  1490  }
  1491  
  1492  static always_inline long skip_container_fast(const GoString *src, long *p, char lc, char rc) {
  1493      long nb = src->len - *p;
  1494      const char *s = src->buf + *p;
  1495      long vi = *p - 1;
  1496  
  1497      uint64_t prev_inquote = 0, prev_bs = 0;
  1498      uint64_t lbrace = 0, rbrace = 0;
  1499      size_t lnum = 0, rnum = 0, last_lnum = 0;
  1500      uint64_t inquote = 0;
  1501  
  1502      while (likely(nb >= 64)) {
  1503  skip:
  1504          inquote = get_string_maskx64(s, &prev_inquote, &prev_bs);
  1505          lbrace = get_maskx64(s, lc) & ~inquote;
  1506          rbrace = get_maskx64(s, rc) & ~inquote;
  1507  
  1508          /* traverse each right brace */
  1509          last_lnum = lnum;
  1510          while (rbrace > 0) {
  1511              uint64_t lbrace_first = (rbrace - 1) & lbrace;
  1512              lnum = last_lnum + __builtin_popcountll((int64_t)lbrace_first);
  1513              bool is_closed = lnum <= rnum;
  1514              if (is_closed) {
  1515                  *p = src->len - nb + __builtin_ctzll(rbrace) + 1;
  1516                  // *p is out-of-bound access here
  1517                  if (*p > src->len) {
  1518                      *p = src->len;
  1519                      return -ERR_EOF;
  1520                  }
  1521                  return vi;
  1522              }
  1523              rbrace &= (rbrace - 1); // clear the lowest right brace
  1524              rnum ++;
  1525          }
  1526          lnum = last_lnum + __builtin_popcountll((int64_t)lbrace);
  1527          s += 64, nb -= 64;
  1528      }
  1529  
  1530      if (nb <= 0) {
  1531          *p = src->len;
  1532          return -ERR_EOF;
  1533      }
  1534  
  1535      char tbuf[64] = {0};
  1536      bool cross_page = vec_cross_page(s, 64);
  1537      if (cross_page) {
  1538          memcpy_p64(tbuf, s, nb);
  1539          s = tbuf;
  1540      }
  1541      goto skip;
  1542  }
  1543  
  1544  static always_inline long skip_object_fast(const GoString *src, long *p) {
  1545      return skip_container_fast(src, p, '{', '}');
  1546  }
  1547  
  1548  static always_inline long skip_array_fast(const GoString *src, long *p) {
  1549      return skip_container_fast(src, p, '[', ']');
  1550  }
  1551  
  1552  static always_inline long skip_string_fast(const GoString *src, long *p) {
  1553      const char* s = src->buf + *p;
  1554      long nb = src->len - *p;
  1555      long vi = *p - 1;
  1556      uint64_t prev_bs = 0, escaped;
  1557  
  1558      while (likely(nb >= 32)) {
  1559          uint32_t quote = get_maskx32(s, '"');
  1560          uint32_t bs_mask = get_maskx32(s, '\\');
  1561          if (bs_mask || prev_bs) {
  1562              bs_mask &= ~prev_bs;
  1563              uint64_t follow_bs = (bs_mask << 1) | prev_bs;
  1564              uint64_t bs_start = bs_mask & ~follow_bs;
  1565              uint64_t odd_start = bs_start & ODD_MASK;
  1566              uint64_t even_or_oc = add32(odd_start, bs_mask, &prev_bs);
  1567              uint64_t even_or_escaped = (even_or_oc << 1) ^ EVEN_MASK;
  1568              escaped = follow_bs & even_or_escaped;
  1569              quote &= ~escaped;
  1570          }
  1571          if (quote) {
  1572              *p = s + __builtin_ctzll(quote) + 1 - src->buf;
  1573              return vi;
  1574          }
  1575          s += 32;
  1576          nb -= 32;
  1577      }
  1578  
  1579      if (unlikely(prev_bs != 0)) {
  1580          if (nb == 0) return -ERR_EOF;
  1581          s++, nb--;
  1582      }
  1583  
  1584      while (likely(nb > 0)) {
  1585          if (*s == '\\') {
  1586              s += 2, nb -= 2;
  1587              continue;
  1588          }
  1589          if (*s == '"') {
  1590              *p = s - src->buf + 1;
  1591              return vi;
  1592          }
  1593          s++, nb--;
  1594      }
  1595      return -ERR_EOF;
  1596  }
  1597  
  1598  static always_inline long skip_one_fast_1(const GoString *src, long *p) {
  1599      char c = advance_ns(src, p);
  1600      /* set the start address */
  1601      long vi = *p - 1;
  1602      switch (c) {
  1603          case '[': return skip_array_fast(src, p);
  1604          case '{': return skip_object_fast(src, p);
  1605          case '"': return skip_string_fast(src, p);
  1606          case '-': case '0' ... '9': return skip_number_fast(src, p);
  1607          case 't': case 'n': { if (*p + 3 <= src->len) { *p += 3; } else { return -ERR_EOF; } }; break;
  1608          case 'f': { if (*p + 4 <= src->len) { *p += 4; } else { return -ERR_EOF; } }; break;
  1609          case  0 : return -ERR_EOF;
  1610          default : *p -= 1; return -ERR_INVAL; // backward error position
  1611      }
  1612      return vi;
  1613  }
  1614  
  1615  
  1616  static always_inline GoKind kind(const GoIface* iface) {
  1617      return (iface->type->kind_flags) &  GO_KIND_MASK;
  1618  }
  1619  
  1620  static always_inline bool is_int(const GoIface* iface) {
  1621      return iface->type != NULL && kind(iface) == Int;
  1622  }
  1623  
  1624  static always_inline bool is_str(const GoIface* iface) {
  1625      return iface->type != NULL && kind(iface) == String;
  1626  }
  1627  
  1628  static always_inline GoString get_str(const GoIface* iface) {
  1629      return *(GoString*)(iface->value);
  1630  }
  1631  
  1632  static always_inline int64_t get_int(const GoIface* iface) {
  1633      return *(int64_t*)(iface->value);
  1634  }
  1635  
  1636  // xmemcmpeq return true if s1 and s2 is equal for the n bytes, otherwise, return false.
  1637  static always_inline bool xmemcmpeq(const char * s1, const char * s2, size_t n) {
  1638      bool c1, c2;
  1639  #if USE_AVX2
  1640      while (n >= 32) {
  1641          __m256i  v1   = _mm256_loadu_si256((const void *)s1);
  1642          __m256i  v2   = _mm256_loadu_si256((const void *)s2);
  1643          uint32_t mask = ~((uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)));
  1644          if (mask) return false;
  1645          s1 += 32;
  1646          s2 += 32;
  1647          n  -= 32;
  1648      };
  1649      c1 = vec_cross_page(s1, 32);
  1650      c2 = vec_cross_page(s2, 32);
  1651      // not cross page
  1652      if (!c1 && !c2) {
  1653          __m256i  v1   = _mm256_loadu_si256((const void *)s1);
  1654          __m256i  v2   = _mm256_loadu_si256((const void *)s2);
  1655          uint32_t mask = ~((uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)));
  1656          bool eq = (mask == 0) || (__builtin_ctzll(mask) >= n);
  1657          return eq;
  1658      }
  1659  #endif
  1660      while (n >= 16) {
  1661          __m128i  v1   = _mm_loadu_si128((const void *)s1);
  1662          __m128i  v2   = _mm_loadu_si128((const void *)s2);
  1663          uint16_t mask = ~((uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)));
  1664          if (mask != 0) return false;
  1665          s1 += 16;
  1666          s2 += 16;
  1667          n  -= 16;
  1668      };
  1669      c1 = vec_cross_page(s1, 16);
  1670      c2 = vec_cross_page(s2, 16);
  1671      // not cross page
  1672      if (!c1 && !c2) {
  1673          __m128i  v1   = _mm_loadu_si128((const void *)s1);
  1674          __m128i  v2   = _mm_loadu_si128((const void *)s2);
  1675          uint16_t mask = ~((uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)));
  1676          bool eq = (mask == 0) || (__builtin_ctzll(mask) >= n);
  1677          return eq;
  1678      }
  1679      // cross page
  1680      while (n > 0 && *s1++ == *s2++) n--;
  1681      return n == 0;
  1682  }
  1683  
  1684  // match_key return negative if errors, zero if not matched, one if matched.
  1685  static always_inline long match_key(const GoString *src, long *p, const GoString key) {
  1686      static const long not_match = 0;
  1687      int64_t v = -1;
  1688      long si = *p;
  1689      long se = advance_string_default(src, *p, &v);
  1690      if (unlikely(se < 0)) {
  1691          *p = src->len;
  1692          return -ERR_EOF;
  1693      }
  1694  
  1695      /* update position */
  1696      *p = se;
  1697  
  1698      /* compare non-escaped strings */
  1699      if (likely(v == -1 || v > se)) {
  1700          long sn = se - si - 1;
  1701  
  1702          // check empty keys
  1703          if (!sn && !key.len) {
  1704              return true;
  1705          }
  1706          
  1707          return sn == key.len && xmemcmpeq(src->buf + si, key.buf, key.len);
  1708      }
  1709  
  1710      /* deal with escaped strings */
  1711      char buf[8] = {0}; // escaped buffer
  1712      const char* sp = src->buf + si;
  1713      const char* end = src->buf + se - 1;
  1714      const char* kp = key.buf;
  1715      const char* ke = key.buf + key.len;
  1716      while (sp < end && kp < ke) {
  1717          if (*sp == '\\') {
  1718              long en = unescape(&sp, end, buf);
  1719              if (en < 0) {
  1720                  *p = sp - src->buf;
  1721                  return en;
  1722              }
  1723              const char* ee = buf + en;
  1724              const char* ep = buf;
  1725              while (kp < ke && ep < ee && *kp == *ep) kp++, ep++;
  1726              if (ep != ee) {
  1727                  return not_match;
  1728              }
  1729          } else if (*sp == *kp) {
  1730              sp++, kp++; 
  1731          } else {
  1732              return not_match;
  1733          }
  1734      };
  1735      return sp == end && kp == ke;
  1736  }