github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/parsing.h (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #pragma once
    18  
    19  #include "native.h"
    20  #include "utils.h"
    21  #include <stdint.h>
    22  
    23  /** String Quoting **/
    24  #define MAX_ESCAPED_BYTES 8
    25  typedef struct {
    26      const long n;
    27      const char s[MAX_ESCAPED_BYTES];
    28  } quoted_t;
    29  
    30  static const quoted_t _SingleQuoteTab[256] = {
    31      ['\x00'] = { .n = 6, .s = "\\u0000" },
    32      ['\x01'] = { .n = 6, .s = "\\u0001" },
    33      ['\x02'] = { .n = 6, .s = "\\u0002" },
    34      ['\x03'] = { .n = 6, .s = "\\u0003" },
    35      ['\x04'] = { .n = 6, .s = "\\u0004" },
    36      ['\x05'] = { .n = 6, .s = "\\u0005" },
    37      ['\x06'] = { .n = 6, .s = "\\u0006" },
    38      ['\x07'] = { .n = 6, .s = "\\u0007" },
    39      ['\b'  ] = { .n = 6, .s = "\\u0008" },
    40      ['\t'  ] = { .n = 2, .s = "\\t"     },
    41      ['\n'  ] = { .n = 2, .s = "\\n"     },
    42      ['\x0b'] = { .n = 6, .s = "\\u000b" },
    43      ['\f'  ] = { .n = 6, .s = "\\u000c" },
    44      ['\r'  ] = { .n = 2, .s = "\\r"     },
    45      ['\x0e'] = { .n = 6, .s = "\\u000e" },
    46      ['\x0f'] = { .n = 6, .s = "\\u000f" },
    47      ['\x10'] = { .n = 6, .s = "\\u0010" },
    48      ['\x11'] = { .n = 6, .s = "\\u0011" },
    49      ['\x12'] = { .n = 6, .s = "\\u0012" },
    50      ['\x13'] = { .n = 6, .s = "\\u0013" },
    51      ['\x14'] = { .n = 6, .s = "\\u0014" },
    52      ['\x15'] = { .n = 6, .s = "\\u0015" },
    53      ['\x16'] = { .n = 6, .s = "\\u0016" },
    54      ['\x17'] = { .n = 6, .s = "\\u0017" },
    55      ['\x18'] = { .n = 6, .s = "\\u0018" },
    56      ['\x19'] = { .n = 6, .s = "\\u0019" },
    57      ['\x1a'] = { .n = 6, .s = "\\u001a" },
    58      ['\x1b'] = { .n = 6, .s = "\\u001b" },
    59      ['\x1c'] = { .n = 6, .s = "\\u001c" },
    60      ['\x1d'] = { .n = 6, .s = "\\u001d" },
    61      ['\x1e'] = { .n = 6, .s = "\\u001e" },
    62      ['\x1f'] = { .n = 6, .s = "\\u001f" },
    63      ['"'   ] = { .n = 2, .s = "\\\""    },
    64      ['\\'  ] = { .n = 2, .s = "\\\\"    },
    65  };
    66  
    67  static const quoted_t _DoubleQuoteTab[256] = {
    68      ['\x00'] = { .n = 7, .s = "\\\\u0000" },
    69      ['\x01'] = { .n = 7, .s = "\\\\u0001" },
    70      ['\x02'] = { .n = 7, .s = "\\\\u0002" },
    71      ['\x03'] = { .n = 7, .s = "\\\\u0003" },
    72      ['\x04'] = { .n = 7, .s = "\\\\u0004" },
    73      ['\x05'] = { .n = 7, .s = "\\\\u0005" },
    74      ['\x06'] = { .n = 7, .s = "\\\\u0006" },
    75      ['\x07'] = { .n = 7, .s = "\\\\u0007" },
    76      ['\b'  ] = { .n = 7, .s = "\\\\u0008" },
    77      ['\t'  ] = { .n = 3, .s = "\\\\t"     },
    78      ['\n'  ] = { .n = 3, .s = "\\\\n"     },
    79      ['\x0b'] = { .n = 7, .s = "\\\\u000b" },
    80      ['\f'  ] = { .n = 7, .s = "\\\\u000c" },
    81      ['\r'  ] = { .n = 3, .s = "\\\\r"     },
    82      ['\x0e'] = { .n = 7, .s = "\\\\u000e" },
    83      ['\x0f'] = { .n = 7, .s = "\\\\u000f" },
    84      ['\x10'] = { .n = 7, .s = "\\\\u0010" },
    85      ['\x11'] = { .n = 7, .s = "\\\\u0011" },
    86      ['\x12'] = { .n = 7, .s = "\\\\u0012" },
    87      ['\x13'] = { .n = 7, .s = "\\\\u0013" },
    88      ['\x14'] = { .n = 7, .s = "\\\\u0014" },
    89      ['\x15'] = { .n = 7, .s = "\\\\u0015" },
    90      ['\x16'] = { .n = 7, .s = "\\\\u0016" },
    91      ['\x17'] = { .n = 7, .s = "\\\\u0017" },
    92      ['\x18'] = { .n = 7, .s = "\\\\u0018" },
    93      ['\x19'] = { .n = 7, .s = "\\\\u0019" },
    94      ['\x1a'] = { .n = 7, .s = "\\\\u001a" },
    95      ['\x1b'] = { .n = 7, .s = "\\\\u001b" },
    96      ['\x1c'] = { .n = 7, .s = "\\\\u001c" },
    97      ['\x1d'] = { .n = 7, .s = "\\\\u001d" },
    98      ['\x1e'] = { .n = 7, .s = "\\\\u001e" },
    99      ['\x1f'] = { .n = 7, .s = "\\\\u001f" },
   100      ['"'   ] = { .n = 4, .s = "\\\\\\\""  },
   101      ['\\'  ] = { .n = 4, .s = "\\\\\\\\"  },
   102  };
   103  
   104  static const quoted_t _HtmlQuoteTab[256] = {
   105      ['<'] = { .n = 6, .s = "\\u003c" },
   106      ['>'] = { .n = 6, .s = "\\u003e" },
   107      ['&'] = { .n = 6, .s = "\\u0026" },
   108      // \u2028 and \u2029 is [E2 80 A8] and [E2 80 A9]
   109      [0xe2] = { .n = 0, .s = {0} },
   110      [0xa8] = { .n = 6, .s = "\\u2028" },
   111      [0xa9] = { .n = 6, .s = "\\u2029" },
   112  };
   113  
   114  static always_inline __m128i _mm_find_quote(__m128i vv) {
   115      __m128i e1 = _mm_cmpgt_epi8   (vv, _mm_set1_epi8(-1));
   116      __m128i e2 = _mm_cmpgt_epi8   (vv, _mm_set1_epi8(31));
   117      __m128i e3 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('"'));
   118      __m128i e4 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('\\'));
   119      __m128i r1 = _mm_andnot_si128 (e2, e1);
   120      __m128i r2 = _mm_or_si128     (e3, e4);
   121      __m128i rv = _mm_or_si128     (r1, r2);
   122      return rv;
   123  }
   124  
   125  #if USE_AVX2
   126  static always_inline __m256i _mm256_find_quote(__m256i vv) {
   127      __m256i e1 = _mm256_cmpgt_epi8   (vv, _mm256_set1_epi8(-1));
   128      __m256i e2 = _mm256_cmpgt_epi8   (vv, _mm256_set1_epi8(31));
   129      __m256i e3 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('"'));
   130      __m256i e4 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('\\'));
   131      __m256i r1 = _mm256_andnot_si256 (e2, e1);
   132      __m256i r2 = _mm256_or_si256     (e3, e4);
   133      __m256i rv = _mm256_or_si256     (r1, r2);
   134      return rv;
   135  }
   136  #endif
   137  
   138  static always_inline ssize_t memcchr_quote(const char *sp, ssize_t nb, char *dp, ssize_t dn) {
   139      uint32_t     mm;
   140      const char * ss = sp;
   141  
   142  #if USE_AVX2
   143      /* 32-byte loop, full store */
   144      while (nb >= 32 && dn >= 32) {
   145          __m256i vv = _mm256_loadu_si256  ((const void *)sp);
   146          __m256i rv = _mm256_find_quote   (vv);
   147                       _mm256_storeu_si256 ((void *)dp, vv);
   148  
   149          /* check for matches */
   150          if ((mm = _mm256_movemask_epi8(rv)) != 0) {
   151              return sp - ss + __builtin_ctz(mm);
   152          }
   153  
   154          /* move to next block */
   155          sp += 32;
   156          dp += 32;
   157          nb -= 32;
   158          dn -= 32;
   159      }
   160  
   161      /* 32-byte test, partial store */
   162      if (nb >= 32) {
   163          __m256i  vv = _mm256_loadu_si256   ((const void *)sp);
   164          __m256i  rv = _mm256_find_quote    (vv);
   165          uint32_t mv = _mm256_movemask_epi8 (rv);
   166          uint32_t fv = __builtin_ctzll      ((uint64_t)mv | 0x0100000000);
   167  
   168          /* copy at most `dn` characters */
   169          if (fv <= dn) {
   170              memcpy_p32(dp, sp, fv);
   171              return sp - ss + fv;
   172          } else {
   173              memcpy_p32(dp, sp, dn);
   174              return -(sp - ss + dn) - 1;
   175          }
   176      }
   177  
   178      /* clear upper half to avoid AVX-SSE transition penalty */
   179      _mm256_zeroupper();
   180  #endif
   181  
   182      /* 16-byte loop, full store */
   183      while (nb >= 16 && dn >= 16) {
   184          __m128i vv = _mm_loadu_si128  ((const void *)sp);
   185          __m128i rv = _mm_find_quote   (vv);
   186                       _mm_storeu_si128 ((void *)dp, vv);
   187  
   188          /* check for matches */
   189          if ((mm = _mm_movemask_epi8(rv)) != 0) {
   190              return sp - ss + __builtin_ctz(mm);
   191          }
   192  
   193          /* move to next block */
   194          sp += 16;
   195          dp += 16;
   196          nb -= 16;
   197          dn -= 16;
   198      }
   199  
   200      /* 16-byte test, partial store */
   201      if (nb >= 16) {
   202          __m128i  vv = _mm_loadu_si128   ((const void *)sp);
   203          __m128i  rv = _mm_find_quote    (vv);
   204          uint32_t mv = _mm_movemask_epi8 (rv);
   205          uint32_t fv = __builtin_ctz     (mv | 0x010000);
   206  
   207          /* copy at most `dn` characters */
   208          if (fv <= dn) {
   209              memcpy_p16(dp, sp, fv);
   210              return sp - ss + fv;
   211          } else {
   212              memcpy_p16(dp, sp, dn);
   213              return -(sp - ss + dn) - 1;
   214          }
   215      }
   216  
   217      /* handle the remaining bytes with scalar code */
   218      while (nb > 0 && dn > 0) {
   219          if (_SingleQuoteTab[*(uint8_t *)sp].n) {
   220              return sp - ss;
   221          } else {
   222              dn--, nb--;
   223              *dp++ = *sp++;
   224          }
   225      }
   226  
   227      /* check for dest buffer */
   228      if (nb == 0) {
   229          return sp - ss;
   230      } else {
   231          return -(sp - ss) - 1;
   232      }
   233  }
   234  
   235  static const bool _EscTab[256] = {
   236      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
   237      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
   238      //   '"'
   239      0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
   240      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
   241      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
   242      //                                 '""
   243      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
   244      // 0x60-0xFF are zeroes
   245  };
   246  
   247  static always_inline uint8_t escape_mask4(const char *sp) {
   248      return _EscTab[*(uint8_t *)(sp)] | (_EscTab[*(uint8_t *)(sp + 1)] << 1) | (_EscTab[*(uint8_t *)(sp + 2)] << 2) | (_EscTab[*(uint8_t *)(sp + 3)]  << 3);
   249  }
   250  
   251  static always_inline ssize_t memcchr_quote_unsafe(const char *sp, ssize_t nb, char *dp, const quoted_t * tab) {
   252      uint32_t     mm;
   253      const char * ds = dp;
   254      size_t cn = 0;
   255  
   256  simd_copy:
   257  
   258      if (nb < 16) goto scalar_copy;
   259  
   260  #if USE_AVX2
   261      /* 32-byte loop, full store */
   262      while (nb >= 32) {
   263          __m256i vv = _mm256_loadu_si256  ((const void *)sp);
   264          __m256i rv = _mm256_find_quote   (vv);
   265                       _mm256_storeu_si256 ((void *)dp, vv);
   266  
   267          /* check for matches */
   268          if ((mm = _mm256_movemask_epi8(rv)) != 0) {
   269              cn = __builtin_ctz(mm);
   270              sp += cn;
   271              nb -= cn;
   272              dp += cn;
   273              goto escape;
   274          }
   275  
   276          /* move to next block */
   277          sp += 32;
   278          dp += 32;
   279          nb -= 32;
   280      }
   281  
   282      /* clear upper half to avoid AVX-SSE transition penalty */
   283      _mm256_zeroupper();
   284  #endif
   285  
   286      /* 16-byte loop, full store */
   287      while (nb >= 16) {
   288          __m128i vv = _mm_loadu_si128  ((const void *)sp);
   289          __m128i rv = _mm_find_quote   (vv);
   290                       _mm_storeu_si128 ((void *)dp, vv);
   291  
   292          /* check for matches */
   293          if ((mm = _mm_movemask_epi8(rv)) != 0) {
   294              cn =  __builtin_ctz(mm);
   295              sp += cn;
   296              nb -= cn;
   297              dp += cn;
   298              goto escape;
   299          }
   300  
   301          /* move to next block */
   302          sp += 16;
   303          dp += 16;
   304          nb -= 16;
   305      }
   306  
   307      /* handle the remaining bytes with scalar code */
   308      // while (nb > 0) {
   309      //     if (_EscTab[*(uint8_t *)sp]) {
   310      //         goto escape;
   311      //     } else {
   312      //         nb--;
   313      //         *dp++ = *sp++;
   314      //     }
   315      // }
   316      // optimize: loop unrolling here
   317  
   318  scalar_copy:
   319      if (nb >= 8) {
   320          uint8_t mask1 = escape_mask4(sp);
   321          *(uint64_t *)dp = *(const uint64_t *)sp;
   322          if (unlikely(mask1)) {
   323              cn =  __builtin_ctz(mask1);
   324              sp += cn;
   325              nb -= cn;
   326              dp += cn;
   327              goto escape;
   328          }
   329          uint8_t mask2 = escape_mask4(sp + 4);
   330          if (unlikely(mask2)) {
   331              cn =  __builtin_ctz(mask2);
   332              sp += cn + 4;
   333              nb -= cn + 4;
   334              dp += cn + 4;
   335              goto escape;
   336          }
   337          dp += 8, sp += 8, nb -= 8;
   338      }
   339  
   340      if (nb >= 4) {
   341          uint8_t mask2 = escape_mask4(sp);
   342          *(uint32_t *)dp = *(const uint32_t *)sp;
   343          if (unlikely(mask2)) {
   344              cn =  __builtin_ctz(mask2);
   345              sp += cn;
   346              nb -= cn;
   347              dp += cn;
   348              goto escape;
   349          }
   350          dp += 4, sp += 4, nb -= 4;
   351      }
   352  
   353      while (nb > 0) {
   354          if (unlikely(_EscTab[*(uint8_t *)(sp)])) goto escape;
   355          *dp++ = *sp++, nb--;
   356      }
   357      /* all quote done */
   358      return dp - ds;
   359  escape:
   360       /* get the escape entry, handle consecutive quotes */
   361       do {
   362          uint8_t ch = *(uint8_t *)sp;
   363          int nc = tab[ch].n;
   364          /* copy the quoted value.
   365           * Note: dp always has at least 8 bytes (MAX_ESCAPED_BYTES) here.
   366           * so, we not use memcpy_p8(dp, tab[ch].s, nc);
   367           */
   368          *(uint64_t *)dp = *(const uint64_t *)tab[ch].s;
   369          sp++;
   370          nb--;
   371          dp += nc;
   372          if (nb <= 0) break;
   373          /* copy and find escape chars */
   374          if (_EscTab[*(uint8_t *)(sp)] == 0) {
   375              goto simd_copy;
   376          }
   377      } while (true);
   378      return dp - ds;
   379  }
   380  
   381  /** String Unquoting **/
   382  
   383  static const char _UnquoteTab[256] = {
   384      ['/' ] = '/',
   385      ['"' ] = '"',
   386      ['b' ] = '\b',
   387      ['f' ] = '\f',
   388      ['n' ] = '\n',
   389      ['r' ] = '\r',
   390      ['t' ] = '\t',
   391      ['u' ] = -1,
   392      ['\\'] = '\\',
   393  };
   394  
   395  static always_inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
   396      int64_t      r;
   397      ssize_t      n = nb;
   398      const char * q = s;
   399  
   400  #if USE_AVX2
   401      __m256i u;
   402      __m256i v;
   403      __m256i b = _mm256_set1_epi8('\\');
   404  
   405      /* process every 32 bytes */
   406      while (n >= 32) {
   407          u = _mm256_loadu_si256  ((const void *)s);
   408          v = _mm256_cmpeq_epi8   (u, b);
   409              _mm256_storeu_si256 ((void *)p, u);
   410  
   411          /* check for matches */
   412          if ((r = _mm256_movemask_epi8(v)) != 0) {
   413              return s - q + __builtin_ctzll(r);
   414          }
   415  
   416          /* move to the next 32 bytes */
   417          s += 32;
   418          p += 32;
   419          n -= 32;
   420      }
   421  
   422      /* clear upper half to avoid AVX-SSE transition penalty */
   423      _mm256_zeroupper();
   424  #endif
   425  
   426      /* initialze with '\\' */
   427      __m128i x;
   428      __m128i y;
   429      __m128i a = _mm_set1_epi8('\\');
   430  
   431      /* process every 16 bytes */
   432      while (n >= 16) {
   433          x = _mm_loadu_si128  ((const void *)s);
   434          y = _mm_cmpeq_epi8   (x, a);
   435              _mm_storeu_si128 ((void *)p, x);
   436  
   437          /* check for matches */
   438          if ((r = _mm_movemask_epi8(y)) != 0) {
   439              return s - q + __builtin_ctzll(r);
   440          }
   441  
   442          /* move to the next 16 bytes */
   443          s += 16;
   444          p += 16;
   445          n -= 16;
   446      }
   447  
   448      /* remaining bytes, do with scalar code */
   449      while (n--) {
   450          if (*s != '\\') {
   451              *p++ = *s++;
   452          } else {
   453              return s - q;
   454          }
   455      }
   456  
   457      /* nothing found, but everything was copied */
   458      return -1;
   459  }
   460  
   461  #define ALL_01h     (~0ul / 255)
   462  #define ALL_7fh     (ALL_01h * 127)
   463  #define ALL_80h     (ALL_01h * 128)
   464  
   465  static always_inline uint32_t hasless(uint32_t x, uint8_t n) {
   466      return (x - ALL_01h * n) & ~x & ALL_80h;
   467  }
   468  
   469  static always_inline uint32_t hasmore(uint32_t x, uint8_t n) {
   470      return (x + ALL_01h * (127 - n) | x) & ALL_80h;
   471  }
   472  
   473  static always_inline uint32_t hasbetween(uint32_t x, uint8_t m, uint8_t n) {
   474      return (ALL_01h * (127 + n) - (x & ALL_7fh) & ~x & (x & ALL_7fh) + ALL_01h * (127 - m)) & ALL_80h;
   475  }
   476  
   477  #undef ALL_01h
   478  #undef ALL_7fh
   479  #undef ALL_80h
   480  
   481  static always_inline char ishex(char c) {
   482      return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
   483  }
   484  
   485  static always_inline void unirep(char **dp) {
   486      *(*dp)++ = 0xef;
   487      *(*dp)++ = 0xbf;
   488      *(*dp)++ = 0xbd;
   489  }
   490  
   491  static always_inline char unhex16_is(const char *s) {
   492      uint32_t v = *(uint32_t *)s;
   493      return !(hasless(v, '0') || hasmore(v, 'f') || hasbetween(v, '9', 'A') || hasbetween(v, 'F', 'a'));
   494  }
   495  
   496  static always_inline uint32_t unhex16_fast(const char *s) {
   497      uint32_t a = __builtin_bswap32(*(uint32_t *)s);
   498      uint32_t b = 9 * ((~a & 0x10101010) >> 4) + (a & 0x0f0f0f0f);
   499      uint32_t c = (b >> 4) | b;
   500      uint32_t d = ((c >> 8) & 0xff00) | (c & 0x00ff);
   501      return d;
   502  }
   503  
   504  static always_inline __m128i _mm_find_html(__m128i vv) {
   505      __m128i e1 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('<'));
   506      __m128i e2 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('>'));
   507      __m128i e3 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('&'));
   508      __m128i e4 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('\xe2'));
   509      __m128i r1 = _mm_or_si128     (e1, e2);
   510      __m128i r2 = _mm_or_si128     (e3, e4);
   511      __m128i rv = _mm_or_si128     (r1, r2);
   512      return rv;
   513  }
   514  
   515  #if USE_AVX2
   516  static always_inline __m256i _mm256_find_html(__m256i vv) {
   517      __m256i e1 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('<'));
   518      __m256i e2 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('>'));
   519      __m256i e3 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('&'));
   520      __m256i e4 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('\xe2'));
   521      __m256i r1 = _mm256_or_si256     (e1, e2);
   522      __m256i r2 = _mm256_or_si256     (e3, e4);
   523      __m256i rv = _mm256_or_si256     (r1, r2);
   524      return rv;
   525  }
   526  #endif
   527  
   528  static always_inline ssize_t memcchr_html_quote(const char *sp, ssize_t nb, char *dp, ssize_t dn) {
   529      uint32_t     mm;
   530      const char * ss = sp;
   531  
   532  #if USE_AVX2
   533      /* 32-byte loop, full store */
   534      while (nb >= 32 && dn >= 32) {
   535          __m256i vv = _mm256_loadu_si256  ((const void *)sp);
   536          __m256i rv = _mm256_find_html    (vv);
   537                       _mm256_storeu_si256 ((void *)dp, vv);
   538  
   539          /* check for matches */
   540          if ((mm = _mm256_movemask_epi8(rv)) != 0) {
   541              return sp - ss + __builtin_ctz(mm);
   542          }
   543  
   544          /* move to next block */
   545          sp += 32;
   546          dp += 32;
   547          nb -= 32;
   548          dn -= 32;
   549      }
   550  
   551      /* 32-byte test, partial store */
   552      if (nb >= 32) {
   553          __m256i  vv = _mm256_loadu_si256   ((const void *)sp);
   554          __m256i  rv = _mm256_find_html     (vv);
   555          uint32_t mv = _mm256_movemask_epi8 (rv);
   556          uint32_t fv = __builtin_ctzll      ((uint64_t)mv | 0x0100000000);
   557  
   558          /* copy at most `dn` characters */
   559          if (fv <= dn) {
   560              memcpy_p32(dp, sp, fv);
   561              return sp - ss + fv;
   562          } else {
   563              memcpy_p32(dp, sp, dn);
   564              return -(sp - ss + dn) - 1;
   565          }
   566      }
   567  
   568      /* clear upper half to avoid AVX-SSE transition penalty */
   569      _mm256_zeroupper();
   570  #endif
   571  
   572      /* 16-byte loop, full store */
   573      while (nb >= 16 && dn >= 16) {
   574          __m128i vv = _mm_loadu_si128  ((const void *)sp);
   575          __m128i rv =  _mm_find_html   (vv);
   576                       _mm_storeu_si128 ((void *)dp, vv);
   577  
   578          /* check for matches */
   579          if ((mm = _mm_movemask_epi8(rv)) != 0) {
   580              return sp - ss + __builtin_ctz(mm);
   581          }
   582  
   583          /* move to next block */
   584          sp += 16;
   585          dp += 16;
   586          nb -= 16;
   587          dn -= 16;
   588      }
   589  
   590      /* 16-byte test, partial store */
   591      if (nb >= 16) {
   592          __m128i  vv = _mm_loadu_si128   ((const void *)sp);
   593          __m128i  rv =  _mm_find_html    (vv);
   594          uint32_t mv = _mm_movemask_epi8 (rv);
   595          uint32_t fv = __builtin_ctz     (mv | 0x010000);
   596  
   597          /* copy at most `dn` characters */
   598          if (fv <= dn) {
   599              memcpy_p16(dp, sp, fv);
   600              return sp - ss + fv;
   601          } else {
   602              memcpy_p16(dp, sp, dn);
   603              return -(sp - ss + dn) - 1;
   604          }
   605      }
   606  
   607      /* handle the remaining bytes with scalar code */
   608      while (nb > 0 && dn > 0) {
   609          if (*sp == '<' || *sp == '>' || *sp == '&' || *sp == '\xe2') {
   610              return sp - ss;
   611          } else {
   612              dn--, nb--;
   613              *dp++ = *sp++;
   614          }
   615      }
   616  
   617      /* check for dest buffer */
   618      if (nb == 0) {
   619          return sp - ss;
   620      } else {
   621          return -(sp - ss) - 1;
   622      }
   623  }
   624  
   625  static always_inline long unescape(const char** src, const char* end, char* dp) {
   626      const char* sp = *src;
   627      long nb = end - sp;
   628      char cc = 0;
   629      uint32_t r0, r1;
   630  
   631      if (nb <= 0) return -ERR_EOF;
   632  
   633      if ((cc = _UnquoteTab[(uint8_t)sp[1]]) == 0) {
   634          *src += 1;
   635          return -ERR_ESCAPE;
   636      }
   637  
   638      if (cc != -1) {
   639          *dp = cc;
   640          *src += 2;
   641          return 1;
   642      }
   643  
   644      if (nb < 4) {
   645          *src += 1;
   646          return -ERR_EOF;
   647      }
   648  
   649      /* check for hexadecimal characters */
   650      if (!unhex16_is(sp + 2)) {
   651          *src += 2;
   652          return -ERR_INVAL;
   653      }
   654  
   655      /* decode the code-point */
   656      r0 = unhex16_fast(sp + 2);
   657      sp += 6;
   658      *src = sp;
   659  
   660      /* ASCII characters, unlikely */
   661      if (unlikely(r0 <= 0x7f)) {
   662          *dp++ = (char)r0;
   663          return 1;
   664      }
   665  
   666      /* latin-1 characters, unlikely */
   667      if (unlikely(r0 <= 0x07ff)) {
   668          *dp++ = (char)(0xc0 | (r0 >> 6));
   669          *dp++ = (char)(0x80 | (r0 & 0x3f));
   670          return 2;
   671      }
   672  
   673      /* 3-byte characters, likely */
   674      if (likely(r0 < 0xd800 || r0 > 0xdfff)) {
   675          *dp++ = (char)(0xe0 | ((r0 >> 12)       ));
   676          *dp++ = (char)(0x80 | ((r0 >>  6) & 0x3f));
   677          *dp++ = (char)(0x80 | ((r0      ) & 0x3f));
   678          return 3;
   679      }
   680  
   681      /* surrogate half, must follows by the other half */
   682      if (nb < 6 || r0 > 0xdbff || sp[0] != '\\' || sp[1] != 'u') {
   683          return -ERR_UNICODE;
   684      }
   685  
   686      /* check the hexadecimal escape */
   687      if (!unhex16_is(sp + 2)) {
   688          *src += 2;
   689          return -ERR_INVAL;
   690      }
   691  
   692      /* decode the second code-point */
   693      r1 = unhex16_fast(sp + 2);
   694  
   695      /* it must be the other half */
   696      if (r1 < 0xdc00 || r1 > 0xdfff) {
   697          *src += 2;
   698          return -ERR_UNICODE;
   699      }
   700  
   701      /* merge two surrogates */
   702      r0 = (r0 - 0xd800) << 10;
   703      r1 = (r1 - 0xdc00) + 0x010000;
   704      r0 += r1;
   705  
   706      /* encode the character */
   707      *dp++ = (char)(0xf0 | ((r0 >> 18)       ));
   708      *dp++ = (char)(0x80 | ((r0 >> 12) & 0x3f));
   709      *dp++ = (char)(0x80 | ((r0 >>  6) & 0x3f));
   710      *dp++ = (char)(0x80 | ((r0      ) & 0x3f));
   711      *src = sp + 6;
   712      return 4;
   713  }