github.com/cloudwego/dynamicgo@v0.2.6-0.20240519101509-707f41b6b834/native/scanning.c (about)

     1  /*
     2   * Copyright 2023 CloudWeGo Authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "native.h"
    18  #include "scanning.h"
    19  
    20  static const char *CS_ARRAY = "[]{},\"[]{},\"[]{}";
    21  static const char *CS_OBJECT = "[]{},:\"[]{}:,\"[]";
    22  
    23  static const uint64_t ODD_MASK = 0xaaaaaaaaaaaaaaaa;
    24  static const uint64_t EVEN_MASK = 0x5555555555555555;
    25  
    26  static const double P10_TAB[23] = {
    27      /* <= the connvertion to double is not exact when less than 1 => */ 1e-000,
    28      1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010,
    29      1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020,
    30      1e+021, 1e+022 /* <= the connvertion to double is not exact when larger,  => */
    31  };
    32  
    33  static inline uint64_t add32(uint64_t v1, uint64_t v2, uint64_t *vo)
    34  {
    35      uint32_t v;
    36      uint32_t c = __builtin_uadd_overflow((uint32_t)v1, (uint32_t)v2, &v);
    37  
    38      /* set the carry */
    39      *vo = c;
    40      return v;
    41  }
    42  
    43  static inline uint64_t add64(uint64_t v1, uint64_t v2, uint64_t *vo)
    44  {
    45      uint64_t v;
    46      uint64_t c = __builtin_uaddll_overflow(v1, v2, &v);
    47  
    48      /* set the carry */
    49      *vo = c;
    50      return v;
    51  }
    52  
    53  static inline char isspace(char ch)
    54  {
    55      return ch == ' ' || ch == '\r' || ch == '\n' | ch == '\t';
    56  }
    57  
    58  static inline void vdigits(const GoString *src, long *p, JsonState *ret)
    59  {
    60      --*p;
    61      vnumber(src, p, ret);
    62  }
    63  
    64  char advance_ns(const GoString *src, long *p)
    65  {
    66      size_t vi = *p;
    67      size_t nb = src->len;
    68      const char *sp = src->buf;
    69  
    70      /* it's likely to run into non-spaces within a few
    71       * characters, so test up to 4 characters manually */
    72      if (vi < nb && !isspace(sp[vi]))
    73          goto nospace;
    74      else
    75          vi++;
    76      if (vi < nb && !isspace(sp[vi]))
    77          goto nospace;
    78      else
    79          vi++;
    80      if (vi < nb && !isspace(sp[vi]))
    81          goto nospace;
    82      else
    83          vi++;
    84      if (vi < nb && !isspace(sp[vi]))
    85          goto nospace;
    86      else
    87          vi++;
    88  
    89      /* check EOF */
    90      if (vi >= nb)
    91      {
    92          *p = vi;
    93          return 0;
    94      }
    95  
    96      /* too many spaces, use SIMD to search for characters */
    97      if ((vi = lspace(sp, nb, vi)) >= nb)
    98      {
    99          return 0;
   100      }
   101  
   102  nospace:
   103      *p = vi + 1;
   104      return src->buf[vi];
   105  }
   106  
   107  int64_t advance_dword(const GoString *src, long *p, long dec, int64_t ret, uint32_t val)
   108  {
   109      if (*p > src->len + dec - 4)
   110      {
   111          *p = src->len;
   112          return -ERR_EOF;
   113      }
   114      else if (*(uint32_t *)(src->buf + *p - dec) == val)
   115      {
   116          *p += 4 - dec;
   117          return ret;
   118      }
   119      else
   120      {
   121          *p -= dec;
   122          for (int i = 0; src->buf[*p] == (val & 0xff); i++, ++*p)
   123          {
   124              val >>= 8;
   125          }
   126          return -ERR_INVAL;
   127      }
   128  }
   129  
   130  static inline ssize_t advance_string(const GoString *src, long p, int64_t *ep)
   131  {
   132      char ch;
   133      uint64_t es;
   134      uint64_t fe;
   135      uint64_t os;
   136      uint64_t m0;
   137      uint64_t m1;
   138      uint64_t cr = 0;
   139  
   140      /* prevent out-of-bounds accessing */
   141      if (unlikely(src->len == p))
   142      {
   143          return -ERR_EOF;
   144      }
   145  
   146      /* buffer pointers */
   147      size_t nb = src->len;
   148      const char *sp = src->buf;
   149      const char *ss = src->buf;
   150  
   151  #define ep_init() *ep = -1;
   152  #define ep_setc() ep_setx(sp - ss - 1)
   153  #define ep_setx(x) \
   154      if (*ep == -1) \
   155      {              \
   156          *ep = (x); \
   157      }
   158  
   159      /* seek to `p` */
   160      nb -= p;
   161      sp += p;
   162      ep_init()
   163  
   164  #if USE_AVX2
   165          /* initialize vectors */
   166          __m256i v0;
   167      __m256i v1;
   168      __m256i q0;
   169      __m256i q1;
   170      __m256i x0;
   171      __m256i x1;
   172      __m256i cq = _mm256_set1_epi8('"');
   173      __m256i cx = _mm256_set1_epi8('\\');
   174  
   175      /* partial masks */
   176      uint32_t s0;
   177      uint32_t s1;
   178      uint32_t t0;
   179      uint32_t t1;
   180  #else
   181          /* initialize vectors */
   182          __m128i v0;
   183      __m128i v1;
   184      __m128i v2;
   185      __m128i v3;
   186      __m128i q0;
   187      __m128i q1;
   188      __m128i q2;
   189      __m128i q3;
   190      __m128i x0;
   191      __m128i x1;
   192      __m128i x2;
   193      __m128i x3;
   194      __m128i cq = _mm_set1_epi8('"');
   195      __m128i cx = _mm_set1_epi8('\\');
   196  
   197      /* partial masks */
   198      uint32_t s0;
   199      uint32_t s1;
   200      uint32_t s2;
   201      uint32_t s3;
   202      uint32_t t0;
   203      uint32_t t1;
   204      uint32_t t2;
   205      uint32_t t3;
   206  #endif
   207  
   208  #define m0_mask(add)            \
   209      m1 &= ~cr;                  \
   210      fe = (m1 << 1) | cr;        \
   211      os = (m1 & ~fe) & ODD_MASK; \
   212      es = add(os, m1, &cr) << 1; \
   213      m0 &= ~(fe & (es ^ EVEN_MASK));
   214  
   215      /* 64-byte SIMD loop */
   216      while (likely(nb >= 64))
   217      {
   218  #if USE_AVX2
   219          v0 = _mm256_loadu_si256((const void *)(sp + 0));
   220          v1 = _mm256_loadu_si256((const void *)(sp + 32));
   221          q0 = _mm256_cmpeq_epi8(v0, cq);
   222          q1 = _mm256_cmpeq_epi8(v1, cq);
   223          x0 = _mm256_cmpeq_epi8(v0, cx);
   224          x1 = _mm256_cmpeq_epi8(v1, cx);
   225          s0 = _mm256_movemask_epi8(q0);
   226          s1 = _mm256_movemask_epi8(q1);
   227          t0 = _mm256_movemask_epi8(x0);
   228          t1 = _mm256_movemask_epi8(x1);
   229          m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
   230          m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
   231  #else
   232          v0 = _mm_loadu_si128((const void *)(sp + 0));
   233          v1 = _mm_loadu_si128((const void *)(sp + 16));
   234          v2 = _mm_loadu_si128((const void *)(sp + 32));
   235          v3 = _mm_loadu_si128((const void *)(sp + 48));
   236          q0 = _mm_cmpeq_epi8(v0, cq);
   237          q1 = _mm_cmpeq_epi8(v1, cq);
   238          q2 = _mm_cmpeq_epi8(v2, cq);
   239          q3 = _mm_cmpeq_epi8(v3, cq);
   240          x0 = _mm_cmpeq_epi8(v0, cx);
   241          x1 = _mm_cmpeq_epi8(v1, cx);
   242          x2 = _mm_cmpeq_epi8(v2, cx);
   243          x3 = _mm_cmpeq_epi8(v3, cx);
   244          s0 = _mm_movemask_epi8(q0);
   245          s1 = _mm_movemask_epi8(q1);
   246          s2 = _mm_movemask_epi8(q2);
   247          s3 = _mm_movemask_epi8(q3);
   248          t0 = _mm_movemask_epi8(x0);
   249          t1 = _mm_movemask_epi8(x1);
   250          t2 = _mm_movemask_epi8(x2);
   251          t3 = _mm_movemask_epi8(x3);
   252          m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
   253          m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
   254  #endif
   255  
   256          /** update first quote position */
   257          if (unlikely(m1 != 0))
   258          {
   259              ep_setx(sp - ss + __builtin_ctzll(m1))
   260          }
   261  
   262          /** mask all the escaped quotes */
   263          if (unlikely(m1 != 0 || cr != 0))
   264          {
   265              m0_mask(add64)
   266          }
   267  
   268          /* check for end quote */
   269          if (m0 != 0)
   270          {
   271              return sp - ss + __builtin_ctzll(m0) + 1;
   272          }
   273  
   274          /* move to the next block */
   275          sp += 64;
   276          nb -= 64;
   277      }
   278  
   279      /* 32-byte SIMD round */
   280      if (likely(nb >= 32))
   281      {
   282  #if USE_AVX2
   283          v0 = _mm256_loadu_si256((const void *)sp);
   284          q0 = _mm256_cmpeq_epi8(v0, cq);
   285          x0 = _mm256_cmpeq_epi8(v0, cx);
   286          s0 = _mm256_movemask_epi8(q0);
   287          t0 = _mm256_movemask_epi8(x0);
   288          m0 = (uint64_t)s0;
   289          m1 = (uint64_t)t0;
   290  #else
   291          v0 = _mm_loadu_si128((const void *)(sp + 0));
   292          v1 = _mm_loadu_si128((const void *)(sp + 16));
   293          q0 = _mm_cmpeq_epi8(v0, cq);
   294          q1 = _mm_cmpeq_epi8(v1, cq);
   295          x0 = _mm_cmpeq_epi8(v0, cx);
   296          x1 = _mm_cmpeq_epi8(v1, cx);
   297          s0 = _mm_movemask_epi8(q0);
   298          s1 = _mm_movemask_epi8(q1);
   299          t0 = _mm_movemask_epi8(x0);
   300          t1 = _mm_movemask_epi8(x1);
   301          m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
   302          m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
   303  #endif
   304  
   305          /** update first quote position */
   306          if (unlikely(m1 != 0))
   307          {
   308              ep_setx(sp - ss + __builtin_ctzll(m1))
   309          }
   310  
   311          /** mask all the escaped quotes */
   312          if (unlikely(m1 != 0 || cr != 0))
   313          {
   314              m0_mask(add32)
   315          }
   316  
   317          /* check for end quote */
   318          if (m0 != 0)
   319          {
   320              return sp - ss + __builtin_ctzll(m0) + 1;
   321          }
   322  
   323          /* move to the next block */
   324          sp += 32;
   325          nb -= 32;
   326      }
   327  
   328      /* check for carry */
   329      if (unlikely(cr != 0))
   330      {
   331          if (nb == 0)
   332          {
   333              return -ERR_EOF;
   334          }
   335          else
   336          {
   337              ep_setc()
   338                  sp++,
   339                  nb--;
   340          }
   341      }
   342  
   343      /* handle the remaining bytes with scalar code */
   344      while (nb-- > 0 && (ch = *sp++) != '"')
   345      {
   346          if (unlikely(ch == '\\'))
   347          {
   348              if (nb == 0)
   349              {
   350                  return -ERR_EOF;
   351              }
   352              else
   353              {
   354                  ep_setc()
   355                      sp++,
   356                      nb--;
   357              }
   358          }
   359      }
   360  
   361  #undef ep_init
   362  #undef ep_setc
   363  #undef ep_setx
   364  #undef m0_mask
   365  
   366      /* check for quotes */
   367      if (ch == '"')
   368      {
   369          return sp - ss;
   370      }
   371      else
   372      {
   373          return -ERR_EOF;
   374      }
   375  }
   376  
   377  static inline int _mm_get_mask(__m128i v, __m128i t)
   378  {
   379      return _mm_movemask_epi8(_mm_cmpeq_epi8(v, t));
   380  }
   381  
   382  // contrl char: 0x00 ~ 0x1F
   383  static inline int _mm_cchars_mask(__m128i v)
   384  {
   385      __m128i e1 = _mm_cmpgt_epi8(v, _mm_set1_epi8(-1));
   386      __m128i e2 = _mm_cmpgt_epi8(v, _mm_set1_epi8(31));
   387      return _mm_movemask_epi8(_mm_andnot_si128(e2, e1));
   388  }
   389  
   390  #if USE_AVX2
   391  
   392  static inline int _mm256_get_mask(__m256i v, __m256i t)
   393  {
   394      return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, t));
   395  }
   396  
   397  // contrl char: 0x00 ~ 0x1F
   398  static inline int _mm256_cchars_mask(__m256i v)
   399  {
   400      __m256i e1 = _mm256_cmpgt_epi8(v, _mm256_set1_epi8(-1));
   401      __m256i e2 = _mm256_cmpgt_epi8(v, _mm256_set1_epi8(31));
   402      return _mm256_movemask_epi8(_mm256_andnot_si256(e2, e1));
   403  }
   404  
   405  #endif
   406  
   407  static inline ssize_t advance_validate_string(const GoString *src, long p, int64_t *ep)
   408  {
   409      char ch;
   410      uint64_t es;
   411      uint64_t fe;
   412      uint64_t os;
   413      uint64_t m0;
   414      uint64_t m1;
   415      uint64_t m2;
   416      uint64_t cr = 0;
   417      long qp = 0;
   418      long np = 0;
   419  
   420      /* prevent out-of-bounds accessing */
   421      if (unlikely(src->len == p))
   422      {
   423          return -ERR_EOF;
   424      }
   425  
   426      /* buffer pointers */
   427      size_t nb = src->len;
   428      const char *sp = src->buf;
   429      const char *ss = src->buf;
   430  
   431  #define ep_init() *ep = -1;
   432  #define ep_setc() ep_setx(sp - ss - 1)
   433  #define ep_setx(x) \
   434      if (*ep == -1) \
   435      {              \
   436          *ep = (x); \
   437      }
   438  
   439      /* seek to `p` */
   440      nb -= p;
   441      sp += p;
   442      ep_init()
   443  
   444  #if USE_AVX2
   445          /* initialize vectors */
   446          __m256i v0;
   447      __m256i v1;
   448      __m256i cq = _mm256_set1_epi8('"');
   449      __m256i cx = _mm256_set1_epi8('\\');
   450  
   451      /* partial masks */
   452      uint32_t s0, s1;
   453      uint32_t t0, t1;
   454      uint32_t c0, c1;
   455  #else
   456          /* initialize vectors */
   457          __m128i v0;
   458      __m128i v1;
   459      __m128i v2;
   460      __m128i v3;
   461      __m128i cq = _mm_set1_epi8('"');
   462      __m128i cx = _mm_set1_epi8('\\');
   463  
   464      /* partial masks */
   465      uint32_t s0, s1, s2, s3;
   466      uint32_t t0, t1, t2, t3;
   467      uint32_t c0, c1, c2, c3;
   468  #endif
   469  
   470  #define m0_mask(add)            \
   471      m1 &= ~cr;                  \
   472      fe = (m1 << 1) | cr;        \
   473      os = (m1 & ~fe) & ODD_MASK; \
   474      es = add(os, m1, &cr) << 1; \
   475      m0 &= ~(fe & (es ^ EVEN_MASK));
   476  
   477      /* 64-byte SIMD loop */
   478      while (likely(nb >= 64))
   479      {
   480  #if USE_AVX2
   481          v0 = _mm256_loadu_si256((const void *)(sp + 0));
   482          v1 = _mm256_loadu_si256((const void *)(sp + 32));
   483          s0 = _mm256_get_mask(v0, cq);
   484          s1 = _mm256_get_mask(v1, cq);
   485          t0 = _mm256_get_mask(v0, cx);
   486          t1 = _mm256_get_mask(v1, cx);
   487          c0 = _mm256_cchars_mask(v0);
   488          c1 = _mm256_cchars_mask(v1);
   489          m0 = ((uint64_t)s1 << 32) | (uint64_t)s0;
   490          m1 = ((uint64_t)t1 << 32) | (uint64_t)t0;
   491          m2 = ((uint64_t)c1 << 32) | (uint64_t)c0;
   492  #else
   493          v0 = _mm_loadu_si128((const void *)(sp + 0));
   494          v1 = _mm_loadu_si128((const void *)(sp + 16));
   495          v2 = _mm_loadu_si128((const void *)(sp + 32));
   496          v3 = _mm_loadu_si128((const void *)(sp + 48));
   497          s0 = _mm_get_mask(v0, cq);
   498          s1 = _mm_get_mask(v1, cq);
   499          s2 = _mm_get_mask(v2, cq);
   500          s3 = _mm_get_mask(v3, cq);
   501          t0 = _mm_get_mask(v0, cx);
   502          t1 = _mm_get_mask(v1, cx);
   503          t2 = _mm_get_mask(v2, cx);
   504          t3 = _mm_get_mask(v3, cx);
   505          c0 = _mm_cchars_mask(v0);
   506          c1 = _mm_cchars_mask(v1);
   507          c2 = _mm_cchars_mask(v2);
   508          c3 = _mm_cchars_mask(v3);
   509          m0 = ((uint64_t)s3 << 48) | ((uint64_t)s2 << 32) | ((uint64_t)s1 << 16) | (uint64_t)s0;
   510          m1 = ((uint64_t)t3 << 48) | ((uint64_t)t2 << 32) | ((uint64_t)t1 << 16) | (uint64_t)t0;
   511          m2 = ((uint64_t)c3 << 48) | ((uint64_t)c2 << 32) | ((uint64_t)c1 << 16) | (uint64_t)c0;
   512  
   513  #endif
   514  
   515          /** update first quote position */
   516          if (unlikely(m1 != 0))
   517          {
   518              ep_setx(sp - ss + __builtin_ctzll(m1))
   519          }
   520  
   521          /** mask all the escaped quotes */
   522          if (unlikely(m1 != 0 || cr != 0))
   523          {
   524              m0_mask(add64)
   525          }
   526  
   527          /* get the position of end quote */
   528          if (m0 != 0)
   529          {
   530              qp = sp - ss + __builtin_ctzll(m0) + 1;
   531              /* check control chars in JSON string */
   532              if (unlikely(m2 != 0 && (np = sp - ss + __builtin_ctzll(m2)) < qp))
   533              {
   534                  ep_setx(np) // set error position
   535                      return -ERR_INVAL;
   536              }
   537              return qp;
   538          }
   539  
   540          /* check control chars in JSON string */
   541          if (unlikely(m2 != 0))
   542          {
   543              ep_setx(sp - ss + __builtin_ctzll(m2)) return -ERR_INVAL;
   544          }
   545  
   546          /* move to the next block */
   547          sp += 64;
   548          nb -= 64;
   549      }
   550  
   551      /* 32-byte SIMD round */
   552      if (likely(nb >= 32))
   553      {
   554  #if USE_AVX2
   555          v0 = _mm256_loadu_si256((const void *)sp);
   556          s0 = _mm256_get_mask(v0, cq);
   557          t0 = _mm256_get_mask(v0, cx);
   558          c0 = _mm256_cchars_mask(v0);
   559          m0 = (uint64_t)s0;
   560          m1 = (uint64_t)t0;
   561          m2 = (uint64_t)c0;
   562  #else
   563          v0 = _mm_loadu_si128((const void *)(sp + 0));
   564          v1 = _mm_loadu_si128((const void *)(sp + 16));
   565          s0 = _mm_get_mask(v0, cq);
   566          s1 = _mm_get_mask(v1, cq);
   567          t0 = _mm_get_mask(v0, cx);
   568          t1 = _mm_get_mask(v1, cx);
   569          c0 = _mm_cchars_mask(v0);
   570          c1 = _mm_cchars_mask(v1);
   571          m0 = ((uint64_t)s1 << 16) | (uint64_t)s0;
   572          m1 = ((uint64_t)t1 << 16) | (uint64_t)t0;
   573          m2 = ((uint64_t)c1 << 16) | (uint64_t)c0;
   574  #endif
   575  
   576          /** update first quote position */
   577          if (unlikely(m1 != 0))
   578          {
   579              ep_setx(sp - ss + __builtin_ctzll(m1))
   580          }
   581  
   582          /** mask all the escaped quotes */
   583          if (unlikely(m1 != 0 || cr != 0))
   584          {
   585              m0_mask(add32)
   586          }
   587  
   588          /* get the position of end quote */
   589          if (m0 != 0)
   590          {
   591              qp = sp - ss + __builtin_ctzll(m0) + 1;
   592              /* check control chars in JSON string */
   593              if (unlikely(m2 != 0 && (np = sp - ss + __builtin_ctzll(m2)) < qp))
   594              {
   595                  ep_setx(np) // set error position
   596                      return -ERR_INVAL;
   597              }
   598              return qp;
   599          }
   600  
   601          /* check control chars in JSON string */
   602          if (unlikely(m2 != 0))
   603          {
   604              ep_setx(sp - ss + __builtin_ctzll(m2)) return -ERR_INVAL;
   605          }
   606  
   607          /* move to the next block */
   608          sp += 32;
   609          nb -= 32;
   610      }
   611  
   612      /* check for carry */
   613      if (unlikely(cr != 0))
   614      {
   615          if (nb == 0)
   616          {
   617              return -ERR_EOF;
   618          }
   619          else
   620          {
   621              ep_setc()
   622                  sp++,
   623                  nb--;
   624          }
   625      }
   626  
   627      /* handle the remaining bytes with scalar code */
   628      while (nb-- > 0 && (ch = *sp++) != '"')
   629      {
   630          if (unlikely(ch == '\\'))
   631          {
   632              if (nb == 0)
   633              {
   634                  return -ERR_EOF;
   635              }
   636              else
   637              {
   638                  ep_setc()
   639                      sp++,
   640                      nb--;
   641              }
   642          }
   643          else if (unlikely(ch >= 0 && ch <= 0x1f))
   644          { // control chars
   645              ep_setc() return -ERR_INVAL;
   646          }
   647      }
   648  
   649  #undef ep_init
   650  #undef ep_setc
   651  #undef ep_setx
   652  #undef m0_mask
   653  
   654      /* check for quotes */
   655      if (ch == '"')
   656      {
   657          return sp - ss;
   658      }
   659      else
   660      {
   661          return -ERR_EOF;
   662      }
   663  }
   664  
   665  /** Value Scanning Routines **/
   666  
   667  long value(const char *s, size_t n, long p, JsonState *ret, int allow_control)
   668  {
   669      long q = p;
   670      GoString m = {.buf = s, .len = n};
   671  
   672      /* parse the next identifier, q is UNSAFE, may cause out-of-bounds accessing */
   673      switch (advance_ns(&m, &q))
   674      {
   675      case '-': /* fallthrough */
   676      case '0': /* fallthrough */
   677      case '1': /* fallthrough */
   678      case '2': /* fallthrough */
   679      case '3': /* fallthrough */
   680      case '4': /* fallthrough */
   681      case '5': /* fallthrough */
   682      case '6': /* fallthrough */
   683      case '7': /* fallthrough */
   684      case '8': /* fallthrough */
   685      case '9':
   686          vdigits(&m, &q, ret);
   687          return q;
   688      case '"':
   689          vstring(&m, &q, ret);
   690          return q;
   691      case 'n':
   692          ret->vt = advance_dword(&m, &q, 1, V_NULL, VS_NULL);
   693          return q;
   694      case 't':
   695          ret->vt = advance_dword(&m, &q, 1, V_TRUE, VS_TRUE);
   696          return q;
   697      case 'f':
   698          ret->vt = advance_dword(&m, &q, 0, V_FALSE, VS_ALSE);
   699          return q;
   700      case '[':
   701          ret->vt = V_ARRAY;
   702          return q;
   703      case '{':
   704          ret->vt = V_OBJECT;
   705          return q;
   706      case ':':
   707          ret->vt = allow_control ? V_KEY_SEP : -ERR_INVAL;
   708          return allow_control ? q : q - 1;
   709      case ',':
   710          ret->vt = allow_control ? V_ELEM_SEP : -ERR_INVAL;
   711          return allow_control ? q : q - 1;
   712      case ']':
   713          ret->vt = allow_control ? V_ARRAY_END : -ERR_INVAL;
   714          return allow_control ? q : q - 1;
   715      case '}':
   716          ret->vt = allow_control ? V_OBJECT_END : -ERR_INVAL;
   717          return allow_control ? q : q - 1;
   718      case 0:
   719          ret->vt = V_EOF;
   720          return q;
   721      default:
   722          ret->vt = -ERR_INVAL;
   723          return q - 1;
   724      }
   725  }
   726  
   727  void vstring(const GoString *src, long *p, JsonState *ret)
   728  {
   729      int64_t v = -1;
   730      int64_t i = *p;
   731      ssize_t e = advance_string(src, i, &v);
   732  
   733      /* check for errors */
   734      if (e < 0)
   735      {
   736          *p = src->len;
   737          ret->vt = e;
   738          return;
   739      }
   740  
   741      /* update the result, and fix the escape position (escaping past the end of string) */
   742      *p = e;
   743      ret->iv = i;
   744      ret->vt = V_STRING;
   745      ret->ep = v >= e ? -1 : v;
   746  }
   747  
   748  #define set_vt(t) \
   749      ret->vt = t;
   750  
   751  #define init_ret(t) \
   752      ret->vt = t;    \
   753      ret->dv = 0.0;  \
   754      ret->iv = 0;    \
   755      ret->ep = *p;
   756  
   757  #define check_eof()         \
   758      if (i >= n)             \
   759      {                       \
   760          *p = n;             \
   761          ret->vt = -ERR_EOF; \
   762          return;             \
   763      }
   764  
   765  #define check_sign(on_neg) \
   766      if (s[i] == '-')       \
   767      {                      \
   768          i++;               \
   769          on_neg;            \
   770          check_eof()        \
   771      }
   772  
   773  #define check_digit()             \
   774      if (s[i] < '0' || s[i] > '9') \
   775      {                             \
   776          *p = i;                   \
   777          ret->vt = -ERR_INVAL;     \
   778          return;                   \
   779      }
   780  
   781  #define check_leading_zero()                                                                \
   782      if (s[i] == '0' && (i >= n || (s[i + 1] != '.' && s[i + 1] != 'e' && s[i + 1] != 'E'))) \
   783      {                                                                                       \
   784          *p = ++i;                                                                           \
   785          return;                                                                             \
   786      }
   787  
   788  #define parse_sign(sgn)               \
   789      if (s[i] == '+' || s[i] == '-')   \
   790      {                                 \
   791          sgn = s[i++] == '+' ? 1 : -1; \
   792          check_eof()                   \
   793      }
   794  
   795  #define is_digit(val) \
   796      '0' <= val &&val <= '9'
   797  
   798  #define add_integer_to_mantissa(man, man_nd, exp10, dig) \
   799      if (man_nd < 19)                                     \
   800      {                                                    \
   801          man = man * 10 + dig;                            \
   802          man_nd++;                                        \
   803      }                                                    \
   804      else                                                 \
   805      {                                                    \
   806          exp10++;                                         \
   807      }
   808  
   809  #define add_float_to_mantissa(man, man_nd, exp10, dig) \
   810      man = man * 10 + dig;                              \
   811      man_nd++;                                          \
   812      exp10--;
   813  
   814  #define parse_float_digits(val, sgn, ...)                   \
   815      while (i < n && s[i] >= '0' && s[i] <= '9' __VA_ARGS__) \
   816      {                                                       \
   817          val *= 10;                                          \
   818          val += sgn * (s[i++] - '0');                        \
   819      }
   820  
   821  #define parse_integer_digits(val, sgn, ovf)                \
   822      while (i < n && s[i] >= '0' && s[i] <= '9')            \
   823      {                                                      \
   824          if (add_digit_overflow(val, sgn * (s[i++] - '0'))) \
   825          {                                                  \
   826              ovf = 1;                                       \
   827              break;                                         \
   828          }                                                  \
   829      }
   830  
   831  #define add_digit_overflow(val, chr) (       \
   832      __builtin_mul_overflow(val, 10, &val) || \
   833      __builtin_add_overflow(val, chr, &val))
   834  
   835  #define vinteger(type, sgn, on_neg)                 \
   836      int ovf = 0;                                    \
   837      type val = 0;                                   \
   838                                                      \
   839      /* initial buffer pointers */                   \
   840      long i = *p;                                    \
   841      size_t n = src->len;                            \
   842      const char *s = src->buf;                       \
   843                                                      \
   844      /* initialize the result, and check for '-' */  \
   845      init_ret(V_INTEGER)                             \
   846          check_eof()                                 \
   847              check_sign(on_neg)                      \
   848                                                      \
   849          /* check for leading zero or any digits */  \
   850          check_digit()                               \
   851              check_leading_zero()                    \
   852                  parse_integer_digits(val, sgn, ovf) \
   853                                                      \
   854          /* check for overflow */                    \
   855          if (ovf)                                    \
   856      {                                               \
   857          *p = i - 1;                                 \
   858          ret->vt = -ERR_OVERFLOW;                    \
   859          return;                                     \
   860      }                                               \
   861                                                      \
   862      /* check for the decimal part */                \
   863      if (i < n && s[i] == '.')                       \
   864      {                                               \
   865          *p = i;                                     \
   866          ret->vt = -ERR_NUMBER_FMT;                  \
   867          return;                                     \
   868      }                                               \
   869                                                      \
   870      /* check for the exponent part */               \
   871      if (i < n && (s[i] == 'e' || s[i] == 'E'))      \
   872      {                                               \
   873          *p = i;                                     \
   874          ret->vt = -ERR_NUMBER_FMT;                  \
   875          return;                                     \
   876      }                                               \
   877                                                      \
   878      /* update the result */                         \
   879      *p = i;                                         \
   880      ret->iv = val;
   881  
   882  /** check whether float can represent the val exactly **/
   883  static inline bool is_atof_exact(uint64_t man, int exp, int sgn, double *val)
   884  {
   885      *val = (double)man;
   886  
   887      if (man >> 52 != 0)
   888      {
   889          return false;
   890      }
   891  
   892      /* equal to if (sgn == -1) { *val *= -1; } */
   893      *(uint64_t *)val |= ((uint64_t)(sgn) >> 63 << 63);
   894  
   895      if (exp == 0 || man == 0)
   896      {
   897          return true;
   898      }
   899      else if (exp > 0 && exp <= 15 + 22)
   900      {
   901          /* uint64 integers: accurate range <= 10^15          *
   902           * Powers of 10: accurate range <= 10^22, as P10_TAB *
   903           * Example: man 1, exp 36, is ok                     */
   904          if (exp > 22)
   905          {
   906              *val *= P10_TAB[exp - 22];
   907              exp = 22;
   908          }
   909  
   910          /* f is not accurate when too larger */
   911          if (*val > 1e15 || *val < -1e15)
   912          {
   913              return false;
   914          }
   915  
   916          *val *= P10_TAB[exp];
   917          return true;
   918      }
   919      else if (exp < 0 && exp >= -22)
   920      {
   921          *val /= P10_TAB[-exp];
   922          return true;
   923      }
   924  
   925      return false;
   926  }
   927  
   928  static inline double atof_fast(uint64_t man, int exp, int sgn, int trunc, double *val)
   929  {
   930      double val_up = 0.0;
   931  
   932      /* look-up for fast atof if the conversion can be exactly */
   933      if (is_atof_exact(man, exp, sgn, val))
   934      {
   935          return true;
   936      }
   937  
   938      /* A fast atof algorithm for high percison */
   939      if (atof_eisel_lemire64(man, exp, sgn, val))
   940      {
   941          if (!trunc || (atof_eisel_lemire64(man + 1, exp, sgn, &val_up) && val_up == *val))
   942          {
   943              return true;
   944          }
   945      }
   946  
   947      return false;
   948  }
   949  
   950  static bool inline is_overflow(uint64_t man, int sgn, int exp10)
   951  {
   952      /* the former exp10 != 0 means man has overflowed
   953       * the later euqals to man*sgn < INT64_MIN or > INT64_MAX */
   954      return exp10 != 0 ||
   955             ((man >> 63) == 1 && ((uint64_t)sgn & man) != (1ull << 63));
   956  }
   957  
   958  void vnumber(const GoString *src, long *p, JsonState *ret)
   959  {
   960      int sgn = 1;
   961      uint64_t man = 0; // mantissa for double (float64)
   962      int man_nd = 0;   // # digits of mantissa, 10 ^ 19 fits uint64_t
   963      int exp10 = 0;    // val = sgn * man * 10 ^ exp10
   964      int trunc = 0;
   965      double val = 0;
   966  
   967      /* initial buffer pointers */
   968      long i = *p;
   969      size_t n = src->len;
   970      const char *s = src->buf;
   971      char *dbuf = ret->dbuf;
   972      ssize_t dcap = ret->dcap;
   973  
   974      /* initialize the result, and check for EOF */
   975      init_ret(V_INTEGER)
   976          check_eof()
   977              check_sign(sgn = -1)
   978  
   979          /* check for leading zero */
   980          check_digit()
   981              check_leading_zero()
   982  
   983          /* parse the integer part */
   984          while (i < n && is_digit(s[i]))
   985      {
   986          add_integer_to_mantissa(man, man_nd, exp10, (s[i] - '0'))
   987              i++;
   988      }
   989  
   990      if (exp10 > 0)
   991      {
   992          trunc = 1;
   993      }
   994  
   995      /* check for decimal points */
   996      if (i < n && s[i] == '.')
   997      {
   998          i++;
   999          set_vt(V_DOUBLE)
  1000              check_eof()
  1001                  check_digit()
  1002      }
  1003  
  1004      /* skip the leading zeros of 0.000xxxx */
  1005      if (man == 0 && exp10 == 0)
  1006      {
  1007          while (i < n && s[i] == '0')
  1008          {
  1009              i++;
  1010              exp10--;
  1011          }
  1012          man = 0;
  1013          man_nd = 0;
  1014      }
  1015  
  1016      /* the fractional part (uint64_t mantissa can represent at most 19 digits) */
  1017      while (i < n && man_nd < 19 && is_digit(s[i]))
  1018      {
  1019          add_float_to_mantissa(man, man_nd, exp10, (s[i] - '0'))
  1020              i++;
  1021      }
  1022  
  1023      /* skip the remaining digits */
  1024      while (i < n && is_digit(s[i]))
  1025      {
  1026          trunc = 1;
  1027          i++;
  1028      }
  1029  
  1030      /* check for exponent */
  1031      if (i < n && (s[i] == 'e' || s[i] == 'E'))
  1032      {
  1033          int esm = 1;
  1034          int exp = 0;
  1035  
  1036          /* check for the '+' or '-' sign, and parse the power */
  1037          i++;
  1038          set_vt(V_DOUBLE)
  1039              check_eof()
  1040                  parse_sign(esm)
  1041                      check_digit() while (i < n && is_digit(s[i]))
  1042          {
  1043              if (exp < 10000)
  1044              {
  1045                  exp = exp * 10 + (s[i] - '0');
  1046              }
  1047              i++;
  1048          }
  1049          exp10 += exp * esm;
  1050          goto parse_float;
  1051      }
  1052  
  1053      if (ret->vt == V_INTEGER)
  1054      {
  1055          if (!is_overflow(man, sgn, exp10))
  1056          {
  1057              ret->iv = (int64_t)man * sgn;
  1058              /* following lines equal to ret->dv = (double)(man) * sgn */
  1059              ret->dv = (double)(man);
  1060              *(uint64_t *)&ret->dv |= ((uint64_t)(sgn) >> 63 << 63);
  1061              *p = i;
  1062              return;
  1063          }
  1064          set_vt(V_DOUBLE)
  1065      }
  1066  
  1067  parse_float:
  1068      /* when fast algorithms failed, use slow fallback.*/
  1069      if (!atof_fast(man, exp10, sgn, trunc, &val))
  1070      {
  1071          val = atof_native(s + *p, i - *p, dbuf, dcap);
  1072      }
  1073  
  1074      /* check parsed double val */
  1075      if (is_infinity(val))
  1076      {
  1077          ret->vt = -ERR_FLOAT_INF;
  1078      }
  1079  
  1080      /* update the result */
  1081      ret->dv = val;
  1082      *p = i;
  1083  }
  1084  
  1085  void vsigned(const GoString *src, long *p, JsonState *ret)
  1086  {
  1087      int64_t sgn = 1;
  1088      vinteger(int64_t, sgn, sgn = -1)
  1089  }
  1090  
  1091  void vunsigned(const GoString *src, long *p, JsonState *ret)
  1092  {
  1093      vinteger(uint64_t, 1, {
  1094          *p = i - 1;
  1095          ret->vt = -ERR_NUMBER_FMT;
  1096          return;
  1097      })
  1098  }
  1099  
  1100  #undef init_ret
  1101  #undef check_eof
  1102  #undef check_digit
  1103  #undef check_leading_zero
  1104  #undef parse_sign
  1105  #undef is_digit
  1106  #undef add_integer_to_mantissa
  1107  #undef add_float_to_mantissa
  1108  #undef parse_float_digits
  1109  #undef parse_integer_digits
  1110  #undef add_digit_overflow
  1111  #undef vinteger
  1112  
  1113  /** Value Skipping FSM **/
  1114  
  1115  // static inline void FSM_INIT(StateMachine *self, int vt)
  1116  // {
  1117  //     self->sp = 1;
  1118  //     self->vt[0] = vt;
  1119  // }
  1120  
  1121  // static inline long fsm_push(StateMachine *self, int vt)
  1122  // {
  1123  //     if (self->sp >= MAX_RECURSE)
  1124  //     {
  1125  //         return -ERR_RECURSE_MAX;
  1126  //     }
  1127  //     else
  1128  //     {
  1129  //         self->vt[self->sp++] = vt;
  1130  //         return 0;
  1131  //     }
  1132  // }
  1133  
  1134  static inline long fsm_exec(StateMachine *self, const GoString *src, long *p, int validate_flag)
  1135  {
  1136      int vt;
  1137      char ch;
  1138      long vi = -1;
  1139  
  1140      /* run until no more nested values */
  1141      while (self->sp)
  1142      {
  1143          ch = advance_ns(src, p);
  1144          vt = self->vt[self->sp - 1];
  1145  
  1146          /* set the start address if any */
  1147          if (vi == -1)
  1148          {
  1149              vi = *p - 1;
  1150          }
  1151  
  1152          /* check for special types */
  1153          switch (vt)
  1154          {
  1155          default:
  1156          {
  1157              FSM_DROP(self);
  1158              break;
  1159          }
  1160  
  1161          /* arrays */
  1162          case FSM_ARR:
  1163          {
  1164              switch (ch)
  1165              {
  1166              case ']':
  1167                  FSM_DROP(self);
  1168                  continue;
  1169              case ',':
  1170                  FSM_PUSH(self, FSM_VAL);
  1171                  continue;
  1172              default:
  1173                  return -ERR_INVAL;
  1174              }
  1175          }
  1176  
  1177          /* objects */
  1178          case FSM_OBJ:
  1179          {
  1180              switch (ch)
  1181              {
  1182              case '}':
  1183                  FSM_DROP(self);
  1184                  continue;
  1185              case ',':
  1186                  FSM_PUSH(self, FSM_KEY);
  1187                  continue;
  1188              default:
  1189                  return -ERR_INVAL;
  1190              }
  1191          }
  1192  
  1193          /* object keys */
  1194          case FSM_KEY:
  1195          {
  1196              FSM_CHAR('"');
  1197              FSM_REPL(self, FSM_ELEM);
  1198              FSM_XERR(skip_string(src, p));
  1199              continue;
  1200          }
  1201  
  1202          /* object element */
  1203          case FSM_ELEM:
  1204          {
  1205              FSM_CHAR(':');
  1206              FSM_REPL(self, FSM_VAL);
  1207              continue;
  1208          }
  1209  
  1210          /* arrays, first element */
  1211          case FSM_ARR_0:
  1212          {
  1213              if (ch == ']')
  1214              {
  1215                  FSM_DROP(self);
  1216                  continue;
  1217              }
  1218              else
  1219              {
  1220                  FSM_REPL(self, FSM_ARR);
  1221                  break;
  1222              }
  1223          }
  1224  
  1225          /* objects, first pair */
  1226          case FSM_OBJ_0:
  1227          {
  1228              switch (ch)
  1229              {
  1230              default:
  1231              {
  1232                  return -ERR_INVAL;
  1233              }
  1234  
  1235              /* empty object */
  1236              case '}':
  1237              {
  1238                  FSM_DROP(self);
  1239                  continue;
  1240              }
  1241  
  1242              /* the quote of the first key */
  1243              case '"':
  1244              {
  1245                  FSM_REPL(self, FSM_OBJ);
  1246                  if (validate_flag == VALID_DEFAULT)
  1247                  {
  1248                      FSM_XERR(skip_string(src, p));
  1249                  }
  1250                  else if (validate_flag == VALID_FULL)
  1251                  {
  1252                      FSM_XERR(validate_string(src, p));
  1253                  }
  1254                  FSM_PUSH(self, FSM_ELEM);
  1255                  continue;
  1256              }
  1257              }
  1258          }
  1259          }
  1260  
  1261          /* simple values */
  1262          switch (ch)
  1263          {
  1264          case '0': /* fallthrough */
  1265          case '1': /* fallthrough */
  1266          case '2': /* fallthrough */
  1267          case '3': /* fallthrough */
  1268          case '4': /* fallthrough */
  1269          case '5': /* fallthrough */
  1270          case '6': /* fallthrough */
  1271          case '7': /* fallthrough */
  1272          case '8': /* fallthrough */
  1273          case '9':
  1274              FSM_XERR(skip_positive(src, p));
  1275              break;
  1276          case '-':
  1277              FSM_XERR(skip_negative(src, p));
  1278              break;
  1279          case 'n':
  1280              FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_NULL));
  1281              break;
  1282          case 't':
  1283              FSM_XERR(advance_dword(src, p, 1, *p - 1, VS_TRUE));
  1284              break;
  1285          case 'f':
  1286              FSM_XERR(advance_dword(src, p, 0, *p - 1, VS_ALSE));
  1287              break;
  1288          case '[':
  1289              FSM_PUSH(self, FSM_ARR_0);
  1290              break;
  1291          case '{':
  1292              FSM_PUSH(self, FSM_OBJ_0);
  1293              break;
  1294          case '"':
  1295          {
  1296              if (validate_flag == VALID_DEFAULT)
  1297              {
  1298                  FSM_XERR(skip_string(src, p));
  1299              }
  1300              else if (validate_flag == VALID_FULL)
  1301              {
  1302                  FSM_XERR(validate_string(src, p));
  1303              }
  1304              break;
  1305          }
  1306          case 0:
  1307              return -ERR_EOF;
  1308          default:
  1309              return -ERR_INVAL;
  1310          }
  1311      }
  1312  
  1313      /* all done */
  1314      return vi;
  1315  }
  1316  
  1317  long skip_number(const char *sp, size_t nb)
  1318  {
  1319      long di = -1;
  1320      long ei = -1;
  1321      long si = -1;
  1322      const char *ss = sp;
  1323  
  1324      /* check for EOF */
  1325      if (nb == 0)
  1326      {
  1327          return -1;
  1328      }
  1329  
  1330      /* special case of '0' */
  1331      if (*sp == '0' && (nb == 1 || (sp[1] != '.' && sp[1] != 'e' && sp[1] != 'E')))
  1332      {
  1333          return 1;
  1334      }
  1335  
  1336  #if USE_AVX2
  1337      /* can do with AVX-2 */
  1338      if (likely(nb >= 32))
  1339      {
  1340          __m256i d9 = _mm256_set1_epi8('9');
  1341          __m256i ds = _mm256_set1_epi8('/');
  1342          __m256i dp = _mm256_set1_epi8('.');
  1343          __m256i el = _mm256_set1_epi8('e');
  1344          __m256i eu = _mm256_set1_epi8('E');
  1345          __m256i xp = _mm256_set1_epi8('+');
  1346          __m256i xm = _mm256_set1_epi8('-');
  1347  
  1348          /* 32-byte loop */
  1349          do
  1350          {
  1351              __m256i sb = _mm256_loadu_si256((const void *)sp);
  1352              __m256i i0 = _mm256_cmpgt_epi8(sb, ds);
  1353              __m256i i9 = _mm256_cmpgt_epi8(sb, d9);
  1354              __m256i id = _mm256_cmpeq_epi8(sb, dp);
  1355              __m256i il = _mm256_cmpeq_epi8(sb, el);
  1356              __m256i iu = _mm256_cmpeq_epi8(sb, eu);
  1357              __m256i ip = _mm256_cmpeq_epi8(sb, xp);
  1358              __m256i im = _mm256_cmpeq_epi8(sb, xm);
  1359              __m256i iv = _mm256_andnot_si256(i9, i0);
  1360              __m256i ie = _mm256_or_si256(il, iu);
  1361              __m256i is = _mm256_or_si256(ip, im);
  1362              __m256i rt = _mm256_or_si256(iv, id);
  1363              __m256i ru = _mm256_or_si256(ie, is);
  1364              __m256i rv = _mm256_or_si256(rt, ru);
  1365  
  1366              /* exponent and sign position */
  1367              uint32_t md = _mm256_movemask_epi8(id);
  1368              uint32_t me = _mm256_movemask_epi8(ie);
  1369              uint32_t ms = _mm256_movemask_epi8(is);
  1370              uint32_t mr = _mm256_movemask_epi8(rv);
  1371  
  1372              /* mismatch position */
  1373              uint32_t v;
  1374              uint32_t i = __builtin_ctzll(~(uint64_t)mr | 0x0100000000);
  1375  
  1376              /* mask out excess characters */
  1377              if (i != 32)
  1378              {
  1379                  md &= (1 << i) - 1;
  1380                  me &= (1 << i) - 1;
  1381                  ms &= (1 << i) - 1;
  1382              }
  1383  
  1384              /* check & update decimal point, exponent and sign index */
  1385              check_bits(md)
  1386                  check_bits(me)
  1387                      check_bits(ms)
  1388                          check_vidx(di, md)
  1389                              check_vidx(ei, me)
  1390                                  check_vidx(si, ms)
  1391  
  1392                  /* check for valid number */
  1393                  if (i != 32)
  1394              {
  1395                  sp += i;
  1396                  _mm256_zeroupper();
  1397                  goto check_index;
  1398              }
  1399  
  1400              /* move to next block */
  1401              sp += 32;
  1402              nb -= 32;
  1403          } while (nb >= 32);
  1404  
  1405          /* clear the upper half to prevent AVX-SSE transition penalty */
  1406          _mm256_zeroupper();
  1407      }
  1408  #endif
  1409  
  1410      /* can do with SSE */
  1411      if (likely(nb >= 16))
  1412      {
  1413          __m128i dc = _mm_set1_epi8(':');
  1414          __m128i ds = _mm_set1_epi8('/');
  1415          __m128i dp = _mm_set1_epi8('.');
  1416          __m128i el = _mm_set1_epi8('e');
  1417          __m128i eu = _mm_set1_epi8('E');
  1418          __m128i xp = _mm_set1_epi8('+');
  1419          __m128i xm = _mm_set1_epi8('-');
  1420  
  1421          /* 16-byte loop */
  1422          do
  1423          {
  1424              __m128i sb = _mm_loadu_si128((const void *)sp);
  1425              __m128i i0 = _mm_cmpgt_epi8(sb, ds);
  1426              __m128i i9 = _mm_cmplt_epi8(sb, dc);
  1427              __m128i id = _mm_cmpeq_epi8(sb, dp);
  1428              __m128i il = _mm_cmpeq_epi8(sb, el);
  1429              __m128i iu = _mm_cmpeq_epi8(sb, eu);
  1430              __m128i ip = _mm_cmpeq_epi8(sb, xp);
  1431              __m128i im = _mm_cmpeq_epi8(sb, xm);
  1432              __m128i iv = _mm_and_si128(i9, i0);
  1433              __m128i ie = _mm_or_si128(il, iu);
  1434              __m128i is = _mm_or_si128(ip, im);
  1435              __m128i rt = _mm_or_si128(iv, id);
  1436              __m128i ru = _mm_or_si128(ie, is);
  1437              __m128i rv = _mm_or_si128(rt, ru);
  1438  
  1439              /* exponent and sign position */
  1440              uint32_t md = _mm_movemask_epi8(id);
  1441              uint32_t me = _mm_movemask_epi8(ie);
  1442              uint32_t ms = _mm_movemask_epi8(is);
  1443              uint32_t mr = _mm_movemask_epi8(rv);
  1444  
  1445              /* mismatch position */
  1446              uint32_t v;
  1447              uint32_t i = __builtin_ctzll(~mr | 0x00010000);
  1448  
  1449              /* mask out excess characters */
  1450              if (i != 16)
  1451              {
  1452                  md &= (1 << i) - 1;
  1453                  me &= (1 << i) - 1;
  1454                  ms &= (1 << i) - 1;
  1455              }
  1456  
  1457              /* check & update exponent and sign index */
  1458              check_bits(md)
  1459                  check_bits(me)
  1460                      check_bits(ms)
  1461                          check_vidx(di, md)
  1462                              check_vidx(ei, me)
  1463                                  check_vidx(si, ms)
  1464  
  1465                  /* check for valid number */
  1466                  if (i != 16)
  1467              {
  1468                  sp += i;
  1469                  goto check_index;
  1470              }
  1471  
  1472              /* move to next block */
  1473              sp += 16;
  1474              nb -= 16;
  1475          } while (nb >= 16);
  1476      }
  1477  
  1478      /* remaining bytes, do with scalar code */
  1479      while (likely(nb-- > 0))
  1480      {
  1481          switch (*sp++)
  1482          {
  1483          case '0': /* fallthrough */
  1484          case '1': /* fallthrough */
  1485          case '2': /* fallthrough */
  1486          case '3': /* fallthrough */
  1487          case '4': /* fallthrough */
  1488          case '5': /* fallthrough */
  1489          case '6': /* fallthrough */
  1490          case '7': /* fallthrough */
  1491          case '8': /* fallthrough */
  1492          case '9':
  1493              break;
  1494          case '.':
  1495              check_sidx(di);
  1496              break;
  1497          case 'e': /* fallthrough */
  1498          case 'E':
  1499              check_sidx(ei);
  1500              break;
  1501          case '+': /* fallthrough */
  1502          case '-':
  1503              check_sidx(si);
  1504              break;
  1505          default:
  1506              sp--;
  1507              goto check_index;
  1508          }
  1509      }
  1510  check_index:
  1511      if (di == 0 || si == 0 || ei == 0)
  1512      {
  1513          return -1;
  1514      }
  1515      else if (di == sp - ss - 1 || si == sp - ss - 1 || ei == sp - ss - 1)
  1516      {
  1517          return -(sp - ss);
  1518      }
  1519      else if (si > 0 && ei != si - 1)
  1520      {
  1521          return -si - 1;
  1522      }
  1523      else if (di >= 0 && ei >= 0 && di > ei - 1)
  1524      {
  1525          return -di - 1;
  1526      }
  1527      else if (di >= 0 && ei >= 0 && di == ei - 1)
  1528      {
  1529          return -ei - 1;
  1530      }
  1531      else
  1532      {
  1533          return sp - ss;
  1534      }
  1535  }
  1536  
  1537  long skip_one(const GoString *src, long *p, StateMachine *m)
  1538  {
  1539      FSM_INIT(m, FSM_VAL);
  1540      return fsm_exec(m, src, p, VALID_DEFAULT);
  1541  }
  1542  
  1543  long skip_array(const GoString *src, long *p, StateMachine *m)
  1544  {
  1545      FSM_INIT(m, FSM_ARR_0);
  1546      return fsm_exec(m, src, p, VALID_DEFAULT);
  1547  }
  1548  
  1549  long skip_object(const GoString *src, long *p, StateMachine *m)
  1550  {
  1551      FSM_INIT(m, FSM_OBJ_0);
  1552      return fsm_exec(m, src, p, VALID_DEFAULT);
  1553  }
  1554  
  1555  long skip_string(const GoString *src, long *p)
  1556  {
  1557      int64_t v;
  1558      ssize_t q = *p - 1;
  1559      ssize_t e = advance_string(src, *p, &v);
  1560  
  1561      /* check for errors, and update the position */
  1562      if (e >= 0)
  1563      {
  1564          *p = e;
  1565          return q;
  1566      }
  1567      else
  1568      {
  1569          *p = src->len;
  1570          return e;
  1571      }
  1572  }
  1573  
  1574  long validate_string(const GoString *src, long *p)
  1575  {
  1576      int64_t v;
  1577      ssize_t q = *p - 1;
  1578      ssize_t e = advance_validate_string(src, *p, &v);
  1579  
  1580      /* check for errors in string advance */
  1581      if (e < 0)
  1582      {
  1583          *p = e == -ERR_EOF ? src->len : v;
  1584          return e;
  1585      }
  1586  
  1587      /* check for errors in UTF-8 validate */
  1588      ssize_t nb = e - *p - 1;
  1589      ssize_t r = utf8_validate(src->buf + *p, nb);
  1590      if (r >= 0)
  1591      {
  1592          *p += r;
  1593          return -ERR_INVAL;
  1594      }
  1595      *p = e;
  1596      return q;
  1597  }
  1598  
  1599  long skip_negative(const GoString *src, long *p)
  1600  {
  1601      long i = *p;
  1602      long r = skip_number(src->buf + i, src->len - i);
  1603  
  1604      /* check for errors */
  1605      if (r < 0)
  1606      {
  1607          *p -= r + 1;
  1608          return -ERR_INVAL;
  1609      }
  1610  
  1611      /* update value pointer */
  1612      *p += r;
  1613      return i - 1;
  1614  }
  1615  
  1616  long skip_positive(const GoString *src, long *p)
  1617  {
  1618      long i = *p - 1;
  1619      long r = skip_number(src->buf + i, src->len - i);
  1620  
  1621      /* check for errors */
  1622      if (r < 0)
  1623      {
  1624          *p -= r + 2;
  1625          return -ERR_INVAL;
  1626      }
  1627  
  1628      /* update value pointer */
  1629      *p += r - 1;
  1630      return i;
  1631  }
  1632  
  1633  long validate_one(const GoString *src, long *p, StateMachine *m)
  1634  {
  1635      FSM_INIT(m, FSM_VAL);
  1636      return fsm_exec(m, src, p, VALID_FULL);
  1637  }