github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/native/parsing.c (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #include "native.h"
    18  #include "utils.h"
    19  #include <stdint.h>
    20  
    21  /** String Quoting **/
    22  #define MAX_ESCAPED_BYTES 8
    23  typedef struct {
    24      const long n;
    25      const char s[MAX_ESCAPED_BYTES];
    26  } quoted_t;
    27  
    28  static const quoted_t _SingleQuoteTab[256] = {
    29      ['\x00'] = { .n = 6, .s = "\\u0000" },
    30      ['\x01'] = { .n = 6, .s = "\\u0001" },
    31      ['\x02'] = { .n = 6, .s = "\\u0002" },
    32      ['\x03'] = { .n = 6, .s = "\\u0003" },
    33      ['\x04'] = { .n = 6, .s = "\\u0004" },
    34      ['\x05'] = { .n = 6, .s = "\\u0005" },
    35      ['\x06'] = { .n = 6, .s = "\\u0006" },
    36      ['\x07'] = { .n = 6, .s = "\\u0007" },
    37      ['\b'  ] = { .n = 6, .s = "\\u0008" },
    38      ['\t'  ] = { .n = 2, .s = "\\t"     },
    39      ['\n'  ] = { .n = 2, .s = "\\n"     },
    40      ['\x0b'] = { .n = 6, .s = "\\u000b" },
    41      ['\f'  ] = { .n = 6, .s = "\\u000c" },
    42      ['\r'  ] = { .n = 2, .s = "\\r"     },
    43      ['\x0e'] = { .n = 6, .s = "\\u000e" },
    44      ['\x0f'] = { .n = 6, .s = "\\u000f" },
    45      ['\x10'] = { .n = 6, .s = "\\u0010" },
    46      ['\x11'] = { .n = 6, .s = "\\u0011" },
    47      ['\x12'] = { .n = 6, .s = "\\u0012" },
    48      ['\x13'] = { .n = 6, .s = "\\u0013" },
    49      ['\x14'] = { .n = 6, .s = "\\u0014" },
    50      ['\x15'] = { .n = 6, .s = "\\u0015" },
    51      ['\x16'] = { .n = 6, .s = "\\u0016" },
    52      ['\x17'] = { .n = 6, .s = "\\u0017" },
    53      ['\x18'] = { .n = 6, .s = "\\u0018" },
    54      ['\x19'] = { .n = 6, .s = "\\u0019" },
    55      ['\x1a'] = { .n = 6, .s = "\\u001a" },
    56      ['\x1b'] = { .n = 6, .s = "\\u001b" },
    57      ['\x1c'] = { .n = 6, .s = "\\u001c" },
    58      ['\x1d'] = { .n = 6, .s = "\\u001d" },
    59      ['\x1e'] = { .n = 6, .s = "\\u001e" },
    60      ['\x1f'] = { .n = 6, .s = "\\u001f" },
    61      ['"'   ] = { .n = 2, .s = "\\\""    },
    62      ['\\'  ] = { .n = 2, .s = "\\\\"    },
    63  };
    64  
    65  static const quoted_t _DoubleQuoteTab[256] = {
    66      ['\x00'] = { .n = 7, .s = "\\\\u0000" },
    67      ['\x01'] = { .n = 7, .s = "\\\\u0001" },
    68      ['\x02'] = { .n = 7, .s = "\\\\u0002" },
    69      ['\x03'] = { .n = 7, .s = "\\\\u0003" },
    70      ['\x04'] = { .n = 7, .s = "\\\\u0004" },
    71      ['\x05'] = { .n = 7, .s = "\\\\u0005" },
    72      ['\x06'] = { .n = 7, .s = "\\\\u0006" },
    73      ['\x07'] = { .n = 7, .s = "\\\\u0007" },
    74      ['\b'  ] = { .n = 7, .s = "\\\\u0008" },
    75      ['\t'  ] = { .n = 3, .s = "\\\\t"     },
    76      ['\n'  ] = { .n = 3, .s = "\\\\n"     },
    77      ['\x0b'] = { .n = 7, .s = "\\\\u000b" },
    78      ['\f'  ] = { .n = 7, .s = "\\\\u000c" },
    79      ['\r'  ] = { .n = 3, .s = "\\\\r"     },
    80      ['\x0e'] = { .n = 7, .s = "\\\\u000e" },
    81      ['\x0f'] = { .n = 7, .s = "\\\\u000f" },
    82      ['\x10'] = { .n = 7, .s = "\\\\u0010" },
    83      ['\x11'] = { .n = 7, .s = "\\\\u0011" },
    84      ['\x12'] = { .n = 7, .s = "\\\\u0012" },
    85      ['\x13'] = { .n = 7, .s = "\\\\u0013" },
    86      ['\x14'] = { .n = 7, .s = "\\\\u0014" },
    87      ['\x15'] = { .n = 7, .s = "\\\\u0015" },
    88      ['\x16'] = { .n = 7, .s = "\\\\u0016" },
    89      ['\x17'] = { .n = 7, .s = "\\\\u0017" },
    90      ['\x18'] = { .n = 7, .s = "\\\\u0018" },
    91      ['\x19'] = { .n = 7, .s = "\\\\u0019" },
    92      ['\x1a'] = { .n = 7, .s = "\\\\u001a" },
    93      ['\x1b'] = { .n = 7, .s = "\\\\u001b" },
    94      ['\x1c'] = { .n = 7, .s = "\\\\u001c" },
    95      ['\x1d'] = { .n = 7, .s = "\\\\u001d" },
    96      ['\x1e'] = { .n = 7, .s = "\\\\u001e" },
    97      ['\x1f'] = { .n = 7, .s = "\\\\u001f" },
    98      ['"'   ] = { .n = 4, .s = "\\\\\\\""  },
    99      ['\\'  ] = { .n = 4, .s = "\\\\\\\\"  },
   100  };
   101  
   102  static const quoted_t _HtmlQuoteTab[256] = {
   103      ['<'] = { .n = 6, .s = "\\u003c" },
   104      ['>'] = { .n = 6, .s = "\\u003e" },
   105      ['&'] = { .n = 6, .s = "\\u0026" },
   106      // \u2028 and \u2029 is [E2 80 A8] and [E2 80 A9]
   107      [0xe2] = { .n = 0, .s = {0} },
   108      [0xa8] = { .n = 6, .s = "\\u2028" },
   109      [0xa9] = { .n = 6, .s = "\\u2029" },
   110  };
   111  
   112  static inline __m128i _mm_find_quote(__m128i vv) {
   113      __m128i e1 = _mm_cmpgt_epi8   (vv, _mm_set1_epi8(-1));
   114      __m128i e2 = _mm_cmpgt_epi8   (vv, _mm_set1_epi8(31));
   115      __m128i e3 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('"'));
   116      __m128i e4 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('\\'));
   117      __m128i r1 = _mm_andnot_si128 (e2, e1);
   118      __m128i r2 = _mm_or_si128     (e3, e4);
   119      __m128i rv = _mm_or_si128     (r1, r2);
   120      return rv;
   121  }
   122  
   123  #if USE_AVX2
   124  static inline __m256i _mm256_find_quote(__m256i vv) {
   125      __m256i e1 = _mm256_cmpgt_epi8   (vv, _mm256_set1_epi8(-1));
   126      __m256i e2 = _mm256_cmpgt_epi8   (vv, _mm256_set1_epi8(31));
   127      __m256i e3 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('"'));
   128      __m256i e4 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('\\'));
   129      __m256i r1 = _mm256_andnot_si256 (e2, e1);
   130      __m256i r2 = _mm256_or_si256     (e3, e4);
   131      __m256i rv = _mm256_or_si256     (r1, r2);
   132      return rv;
   133  }
   134  #endif
   135  
   136  static inline ssize_t memcchr_quote(const char *sp, ssize_t nb, char *dp, ssize_t dn) {
   137      uint32_t     mm;
   138      const char * ss = sp;
   139  
   140  #if USE_AVX2
   141      /* 32-byte loop, full store */
   142      while (nb >= 32 && dn >= 32) {
   143          __m256i vv = _mm256_loadu_si256  ((const void *)sp);
   144          __m256i rv = _mm256_find_quote   (vv);
   145                       _mm256_storeu_si256 ((void *)dp, vv);
   146  
   147          /* check for matches */
   148          if ((mm = _mm256_movemask_epi8(rv)) != 0) {
   149              return sp - ss + __builtin_ctz(mm);
   150          }
   151  
   152          /* move to next block */
   153          sp += 32;
   154          dp += 32;
   155          nb -= 32;
   156          dn -= 32;
   157      }
   158  
   159      /* 32-byte test, partial store */
   160      if (nb >= 32) {
   161          __m256i  vv = _mm256_loadu_si256   ((const void *)sp);
   162          __m256i  rv = _mm256_find_quote    (vv);
   163          uint32_t mv = _mm256_movemask_epi8 (rv);
   164          uint32_t fv = __builtin_ctzll      ((uint64_t)mv | 0x0100000000);
   165  
   166          /* copy at most `dn` characters */
   167          if (fv <= dn) {
   168              memcpy_p32(dp, sp, fv);
   169              return sp - ss + fv;
   170          } else {
   171              memcpy_p32(dp, sp, dn);
   172              return -(sp - ss + dn) - 1;
   173          }
   174      }
   175  
   176      /* clear upper half to avoid AVX-SSE transition penalty */
   177      _mm256_zeroupper();
   178  #endif
   179  
   180      /* 16-byte loop, full store */
   181      while (nb >= 16 && dn >= 16) {
   182          __m128i vv = _mm_loadu_si128  ((const void *)sp);
   183          __m128i rv = _mm_find_quote   (vv);
   184                       _mm_storeu_si128 ((void *)dp, vv);
   185  
   186          /* check for matches */
   187          if ((mm = _mm_movemask_epi8(rv)) != 0) {
   188              return sp - ss + __builtin_ctz(mm);
   189          }
   190  
   191          /* move to next block */
   192          sp += 16;
   193          dp += 16;
   194          nb -= 16;
   195          dn -= 16;
   196      }
   197  
   198      /* 16-byte test, partial store */
   199      if (nb >= 16) {
   200          __m128i  vv = _mm_loadu_si128   ((const void *)sp);
   201          __m128i  rv = _mm_find_quote    (vv);
   202          uint32_t mv = _mm_movemask_epi8 (rv);
   203          uint32_t fv = __builtin_ctz     (mv | 0x010000);
   204  
   205          /* copy at most `dn` characters */
   206          if (fv <= dn) {
   207              memcpy_p16(dp, sp, fv);
   208              return sp - ss + fv;
   209          } else {
   210              memcpy_p16(dp, sp, dn);
   211              return -(sp - ss + dn) - 1;
   212          }
   213      }
   214  
   215      /* handle the remaining bytes with scalar code */
   216      while (nb > 0 && dn > 0) {
   217          if (_SingleQuoteTab[*(uint8_t *)sp].n) {
   218              return sp - ss;
   219          } else {
   220              dn--, nb--;
   221              *dp++ = *sp++;
   222          }
   223      }
   224  
   225      /* check for dest buffer */
   226      if (nb == 0) {
   227          return sp - ss;
   228      } else {
   229          return -(sp - ss) - 1;
   230      }
   231  }
   232  
   233  static const bool _EscTab[256] = {
   234      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
   235      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
   236      //   '"'
   237      0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2F
   238      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30-0x3F
   239      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40-0x4F
   240      //                                 '""
   241      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x50-0x5F
   242      // 0x60-0xFF are zeroes
   243  };
   244  
   245  static inline uint8_t escape_mask4(const char *sp) {
   246      return _EscTab[*(uint8_t *)(sp)] | (_EscTab[*(uint8_t *)(sp + 1)] << 1) | (_EscTab[*(uint8_t *)(sp + 2)] << 2) | (_EscTab[*(uint8_t *)(sp + 3)]  << 3);
   247  }
   248  
   249  static inline ssize_t memcchr_quote_unsafe(const char *sp, ssize_t nb, char *dp, const quoted_t * tab) {
   250      uint32_t     mm;
   251      const char * ds = dp;
   252      size_t cn = 0;
   253  
   254  simd_copy:
   255  
   256      if (nb < 16) goto scalar_copy;
   257  
   258  #if USE_AVX2
   259      /* 32-byte loop, full store */
   260      while (nb >= 32) {
   261          __m256i vv = _mm256_loadu_si256  ((const void *)sp);
   262          __m256i rv = _mm256_find_quote   (vv);
   263                       _mm256_storeu_si256 ((void *)dp, vv);
   264  
   265          /* check for matches */
   266          if ((mm = _mm256_movemask_epi8(rv)) != 0) {
   267              cn = __builtin_ctz(mm);
   268              sp += cn;
   269              nb -= cn;
   270              dp += cn;
   271              goto escape;
   272          }
   273  
   274          /* move to next block */
   275          sp += 32;
   276          dp += 32;
   277          nb -= 32;
   278      }
   279  
   280      /* clear upper half to avoid AVX-SSE transition penalty */
   281      _mm256_zeroupper();
   282  #endif
   283  
   284      /* 16-byte loop, full store */
   285      while (nb >= 16) {
   286          __m128i vv = _mm_loadu_si128  ((const void *)sp);
   287          __m128i rv = _mm_find_quote   (vv);
   288                       _mm_storeu_si128 ((void *)dp, vv);
   289  
   290          /* check for matches */
   291          if ((mm = _mm_movemask_epi8(rv)) != 0) {
   292              cn =  __builtin_ctz(mm);
   293              sp += cn;
   294              nb -= cn;
   295              dp += cn;
   296              goto escape;
   297          }
   298  
   299          /* move to next block */
   300          sp += 16;
   301          dp += 16;
   302          nb -= 16;
   303      }
   304  
   305      /* handle the remaining bytes with scalar code */
   306      // while (nb > 0) {
   307      //     if (_EscTab[*(uint8_t *)sp]) {
   308      //         goto escape;
   309      //     } else {
   310      //         nb--;
   311      //         *dp++ = *sp++;
   312      //     }
   313      // }
   314      // optimize: loop unrolling here
   315  
   316  scalar_copy:
   317      if (nb >= 8) {
   318          uint8_t mask1 = escape_mask4(sp);
   319          *(uint64_t *)dp = *(const uint64_t *)sp;
   320          if (unlikely(mask1)) {
   321              cn =  __builtin_ctz(mask1);
   322              sp += cn;
   323              nb -= cn;
   324              dp += cn;
   325              goto escape;
   326          }
   327          uint8_t mask2 = escape_mask4(sp + 4);
   328          if (unlikely(mask2)) {
   329              cn =  __builtin_ctz(mask2);
   330              sp += cn + 4;
   331              nb -= cn + 4;
   332              dp += cn + 4;
   333              goto escape;
   334          }
   335          dp += 8, sp += 8, nb -= 8;
   336      }
   337  
   338      if (nb >= 4) {
   339          uint8_t mask2 = escape_mask4(sp);
   340          *(uint32_t *)dp = *(const uint32_t *)sp;
   341          if (unlikely(mask2)) {
   342              cn =  __builtin_ctz(mask2);
   343              sp += cn;
   344              nb -= cn;
   345              dp += cn;
   346              goto escape;
   347          }
   348          dp += 4, sp += 4, nb -= 4;
   349      }
   350  
   351      while (nb > 0) {
   352          if (unlikely(_EscTab[*(uint8_t *)(sp)])) goto escape;
   353          *dp++ = *sp++, nb--;
   354      }
   355      /* all quote done */
   356      return dp - ds;
   357  escape:
   358       /* get the escape entry, handle consecutive quotes */
   359       do {
   360          uint8_t ch = *(uint8_t *)sp;
   361          int nc = tab[ch].n;
   362          /* copy the quoted value.
   363           * Note: dp always has at least 8 bytes (MAX_ESCAPED_BYTES) here.
   364           * so, we not use memcpy_p8(dp, tab[ch].s, nc);
   365           */
   366          *(uint64_t *)dp = *(const uint64_t *)tab[ch].s;
   367          sp++;
   368          nb--;
   369          dp += nc;
   370          if (nb <= 0) break;
   371          /* copy and find escape chars */
   372          if (_EscTab[*(uint8_t *)(sp)] == 0) {
   373              goto simd_copy;
   374          }
   375      } while (true);
   376      return dp - ds;
   377  }
   378  
   379  ssize_t quote(const char *sp, ssize_t nb, char *dp, ssize_t *dn, uint64_t flags) {
   380      ssize_t          nd = *dn;
   381      const char *     ds = dp;
   382      const char *     ss = sp;
   383      const quoted_t * tab;
   384  
   385      /* select quoting table */
   386      if (!(flags & F_DBLUNQ)) {
   387          tab = _SingleQuoteTab;
   388      } else {
   389          tab = _DoubleQuoteTab;
   390      }
   391  
   392      if (*dn >= nb * MAX_ESCAPED_BYTES) {
   393          *dn = memcchr_quote_unsafe(sp, nb, dp, tab);
   394          return nb;
   395      }
   396  
   397      /* find the special characters, copy on the fly */
   398      while (nb != 0) {
   399          int     nc;
   400          uint8_t ch;
   401          ssize_t rb = memcchr_quote(sp, nb, dp, nd);
   402  
   403          /* not enough buffer space */
   404          if (rb < 0) {
   405              *dn = dp - ds - rb - 1;
   406              return -(sp - ss - rb - 1) - 1;
   407          }
   408  
   409          /* skip already copied bytes */
   410          sp += rb;
   411          dp += rb;
   412          nb -= rb;
   413          nd -= rb;
   414  
   415          /* get the escape entry, handle consecutive quotes */
   416          while (nb != 0) {
   417              ch = *(uint8_t *)sp;
   418              nc = tab[ch].n;
   419  
   420              /* check for escape character */
   421              if (nc == 0) {
   422                  break;
   423              }
   424  
   425              /* check for buffer space */
   426              if (nc > nd) {
   427                  *dn = dp - ds;
   428                  return -(sp - ss) - 1;
   429              }
   430  
   431              /* copy the quoted value */
   432              memcpy_p8(dp, tab[ch].s, nc);
   433              sp++;
   434              nb--;
   435              dp += nc;
   436              nd -= nc;
   437          }
   438      }
   439  
   440      /* all done */
   441      *dn = dp - ds;
   442      return sp - ss;
   443  }
   444  
   445  /** String Unquoting **/
   446  
   447  static const char _UnquoteTab[256] = {
   448      ['/' ] = '/',
   449      ['"' ] = '"',
   450      ['b' ] = '\b',
   451      ['f' ] = '\f',
   452      ['n' ] = '\n',
   453      ['r' ] = '\r',
   454      ['t' ] = '\t',
   455      ['u' ] = -1,
   456      ['\\'] = '\\',
   457  };
   458  
   459  static inline ssize_t memcchr_p32(const char *s, ssize_t nb, char *p) {
   460      int64_t      r;
   461      ssize_t      n = nb;
   462      const char * q = s;
   463  
   464  #if USE_AVX2
   465      __m256i u;
   466      __m256i v;
   467      __m256i b = _mm256_set1_epi8('\\');
   468  
   469      /* process every 32 bytes */
   470      while (n >= 32) {
   471          u = _mm256_loadu_si256  ((const void *)s);
   472          v = _mm256_cmpeq_epi8   (u, b);
   473              _mm256_storeu_si256 ((void *)p, u);
   474  
   475          /* check for matches */
   476          if ((r = _mm256_movemask_epi8(v)) != 0) {
   477              return s - q + __builtin_ctzll(r);
   478          }
   479  
   480          /* move to the next 32 bytes */
   481          s += 32;
   482          p += 32;
   483          n -= 32;
   484      }
   485  
   486      /* clear upper half to avoid AVX-SSE transition penalty */
   487      _mm256_zeroupper();
   488  #endif
   489  
   490      /* initialze with '\\' */
   491      __m128i x;
   492      __m128i y;
   493      __m128i a = _mm_set1_epi8('\\');
   494  
   495      /* process every 16 bytes */
   496      while (n >= 16) {
   497          x = _mm_loadu_si128  ((const void *)s);
   498          y = _mm_cmpeq_epi8   (x, a);
   499              _mm_storeu_si128 ((void *)p, x);
   500  
   501          /* check for matches */
   502          if ((r = _mm_movemask_epi8(y)) != 0) {
   503              return s - q + __builtin_ctzll(r);
   504          }
   505  
   506          /* move to the next 16 bytes */
   507          s += 16;
   508          p += 16;
   509          n -= 16;
   510      }
   511  
   512      /* remaining bytes, do with scalar code */
   513      while (n--) {
   514          if (*s != '\\') {
   515              *p++ = *s++;
   516          } else {
   517              return s - q;
   518          }
   519      }
   520  
   521      /* nothing found, but everything was copied */
   522      return -1;
   523  }
   524  
   525  #define ALL_01h     (~0ul / 255)
   526  #define ALL_7fh     (ALL_01h * 127)
   527  #define ALL_80h     (ALL_01h * 128)
   528  
   529  static inline uint32_t hasless(uint32_t x, uint8_t n) {
   530      return (x - ALL_01h * n) & ~x & ALL_80h;
   531  }
   532  
   533  static inline uint32_t hasmore(uint32_t x, uint8_t n) {
   534      return (x + ALL_01h * (127 - n) | x) & ALL_80h;
   535  }
   536  
   537  static inline uint32_t hasbetween(uint32_t x, uint8_t m, uint8_t n) {
   538      return (ALL_01h * (127 + n) - (x & ALL_7fh) & ~x & (x & ALL_7fh) + ALL_01h * (127 - m)) & ALL_80h;
   539  }
   540  
   541  #undef ALL_01h
   542  #undef ALL_7fh
   543  #undef ALL_80h
   544  
   545  static inline char ishex(char c) {
   546      return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
   547  }
   548  
   549  static inline void unirep(char **dp) {
   550      *(*dp)++ = 0xef;
   551      *(*dp)++ = 0xbf;
   552      *(*dp)++ = 0xbd;
   553  }
   554  
   555  static inline char unhex16_is(const char *s) {
   556      uint32_t v = *(uint32_t *)s;
   557      return !(hasless(v, '0') || hasmore(v, 'f') || hasbetween(v, '9', 'A') || hasbetween(v, 'F', 'a'));
   558  }
   559  
   560  static inline uint32_t unhex16_fast(const char *s) {
   561      uint32_t a = __builtin_bswap32(*(uint32_t *)s);
   562      uint32_t b = 9 * ((~a & 0x10101010) >> 4) + (a & 0x0f0f0f0f);
   563      uint32_t c = (b >> 4) | b;
   564      uint32_t d = ((c >> 8) & 0xff00) | (c & 0x00ff);
   565      return d;
   566  }
   567  
   568  ssize_t unquote(const char *sp, ssize_t nb, char *dp, ssize_t *ep, uint64_t flags) {
   569      ssize_t      n;
   570      ssize_t      x = nb;
   571      const char * s = sp;
   572      const char * p = dp;
   573  
   574      /* scan & copy all the non-escape characters */
   575      while (nb && (n = (*sp == '\\' ? 0 : memcchr_p32(sp, nb, dp))) != -1) {
   576          char     cc;
   577          uint32_t r0;
   578          uint32_t r1;
   579  
   580          /* skip the plain text */
   581          dp += n;
   582          sp += n + 2;
   583          nb -= n + 2;
   584  
   585          /* check for EOF */
   586          if (nb < 0) {
   587              *ep = x;
   588              return -ERR_EOF;
   589          }
   590  
   591          /* check for double unquote */
   592          if (unlikely(flags & F_DBLUNQ)) {
   593              int  nr = nb;
   594              char c1 = sp[-1];
   595  
   596              /* must have at least 1 character left */
   597              if (nr == 0) {
   598                  *ep = x;
   599                  return -ERR_EOF;
   600              }
   601  
   602              /* every quote must be a double quote */
   603              if (c1 != '\\') {
   604                  *ep = sp - s - 1;
   605                  return -ERR_INVAL;
   606              }
   607  
   608              /* special case of '\\\\' and '\\\"' */
   609              if (*sp == '\\') {
   610                  if (nr < 2) {
   611                      *ep = x;
   612                      return -ERR_EOF;
   613                  } else if (sp[1] != '"' && sp[1] != '\\') {
   614                      *ep = sp - s + 1;
   615                      return -ERR_INVAL;
   616                  } else {
   617                      sp++;
   618                      nb--;
   619                  }
   620              }
   621  
   622              /* skip the second escape */
   623              sp++;
   624              nb--;
   625          }
   626  
   627          /* check for escape sequence */
   628          if ((cc = _UnquoteTab[(uint8_t)sp[-1]]) == 0) {
   629              *ep = sp - s - 1;
   630              return -ERR_ESCAPE;
   631          }
   632  
   633          /* check for simple escape sequence */
   634          if (cc != -1) {
   635              *dp++ = cc;
   636              continue;
   637          }
   638  
   639          /* must have at least 4 characters */
   640          if (nb < 4) {
   641              *ep = x;
   642              return -ERR_EOF;
   643          }
   644  
   645          /* check for hexadecimal characters */
   646          if (!unhex16_is(sp)) {
   647              *ep = sp - s;
   648              for (int i = 0; i < 4 && ishex(*sp); i++, sp++) ++*ep;
   649              return -ERR_INVAL;
   650          }
   651  
   652          /* decode the code-point */
   653          r0 = unhex16_fast(sp);
   654          sp += 4;
   655          nb -= 4;
   656  
   657      /* from line 598 */
   658      retry_decode:
   659  
   660          /* ASCII characters, unlikely */
   661          if (unlikely(r0 <= 0x7f)) {
   662              *dp++ = (char)r0;
   663              continue;
   664          }
   665  
   666          /* latin-1 characters, unlikely */
   667          if (unlikely(r0 <= 0x07ff)) {
   668              *dp++ = (char)(0xc0 | (r0 >> 6));
   669              *dp++ = (char)(0x80 | (r0 & 0x3f));
   670              continue;
   671          }
   672  
   673          /* 3-byte characters, likely */
   674          if (likely(r0 < 0xd800 || r0 > 0xdfff)) {
   675              *dp++ = (char)(0xe0 | ((r0 >> 12)       ));
   676              *dp++ = (char)(0x80 | ((r0 >>  6) & 0x3f));
   677              *dp++ = (char)(0x80 | ((r0      ) & 0x3f));
   678              continue;
   679          }
   680  
   681          /* check for double unquote */
   682          if (unlikely(flags & F_DBLUNQ)) {
   683              if (nb < 1) {
   684                  if (likely(flags & F_UNIREP)) {
   685                      unirep(&dp);
   686                      continue;
   687                  } else {
   688                      *ep = x;
   689                      return -ERR_EOF;
   690                  }
   691              } else {
   692                  if (sp[0] == '\\') {
   693                      nb--;
   694                      sp++;
   695                  } else if (likely(flags & F_UNIREP)) {
   696                      unirep(&dp);
   697                      continue;
   698                  } else {
   699                      *ep = sp - s - 4;
   700                      return -ERR_UNICODE;
   701                  }
   702              }
   703          }
   704  
   705          /* surrogate half, must follows by the other half */
   706          if (nb < 6 || r0 > 0xdbff || sp[0] != '\\' || sp[1] != 'u') {
   707              if (likely(flags & F_UNIREP)) {
   708                  unirep(&dp);
   709                  continue;
   710              } else {
   711                  *ep = sp - s - ((flags & F_DBLUNQ) ? 5 : 4);
   712                  return -ERR_UNICODE;
   713              }
   714          }
   715  
   716          /* check the hexadecimal escape */
   717          if (!unhex16_is(sp + 2)) {
   718              *ep = sp - s + 2;
   719              for (int i = 2; i < 6 && ishex(sp[i]); i++) ++*ep;
   720              return -ERR_INVAL;
   721          }
   722  
   723          /* decode the second code-point */
   724          r1 = unhex16_fast(sp + 2);
   725          sp += 6;
   726          nb -= 6;
   727  
   728          /* it must be the other half */
   729          if (r1 < 0xdc00 || r1 > 0xdfff) {
   730              if (unlikely(!(flags & F_UNIREP))) {
   731                  *ep = sp - s - 4;
   732                  return -ERR_UNICODE;
   733              } else {
   734                  r0 = r1;
   735                  unirep(&dp);
   736                  goto retry_decode;
   737              }
   738          }
   739  
   740          /* merge two surrogates */
   741          r0 = (r0 - 0xd800) << 10;
   742          r1 = (r1 - 0xdc00) + 0x010000;
   743          r0 += r1;
   744  
   745          /* check the code point range */
   746          if (r0 > 0x10ffff) {
   747              if (likely(!(flags & F_UNIREP))) {
   748                  *ep = sp - s - 4;
   749                  return -ERR_UNICODE;
   750              } else {
   751                  unirep(&dp);
   752                  continue;
   753              }
   754          }
   755  
   756          /* encode the character */
   757          *dp++ = (char)(0xf0 | ((r0 >> 18)       ));
   758          *dp++ = (char)(0x80 | ((r0 >> 12) & 0x3f));
   759          *dp++ = (char)(0x80 | ((r0 >>  6) & 0x3f));
   760          *dp++ = (char)(0x80 | ((r0      ) & 0x3f));
   761      }
   762  
   763      /* calculate the result length */
   764      return dp + nb - p;
   765  }
   766  
   767  static inline __m128i _mm_find_html(__m128i vv) {
   768      __m128i e1 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('<'));
   769      __m128i e2 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('>'));
   770      __m128i e3 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('&'));
   771      __m128i e4 = _mm_cmpeq_epi8   (vv, _mm_set1_epi8('\xe2'));
   772      __m128i r1 = _mm_or_si128     (e1, e2);
   773      __m128i r2 = _mm_or_si128     (e3, e4);
   774      __m128i rv = _mm_or_si128     (r1, r2);
   775      return rv;
   776  }
   777  
   778  #if USE_AVX2
   779  static inline __m256i _mm256_find_html(__m256i vv) {
   780      __m256i e1 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('<'));
   781      __m256i e2 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('>'));
   782      __m256i e3 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('&'));
   783      __m256i e4 = _mm256_cmpeq_epi8   (vv, _mm256_set1_epi8('\xe2'));
   784      __m256i r1 = _mm256_or_si256     (e1, e2);
   785      __m256i r2 = _mm256_or_si256     (e3, e4);
   786      __m256i rv = _mm256_or_si256     (r1, r2);
   787      return rv;
   788  }
   789  #endif
   790  
   791  static inline ssize_t memcchr_html_quote(const char *sp, ssize_t nb, char *dp, ssize_t dn) {
   792      uint32_t     mm;
   793      const char * ss = sp;
   794  
   795  #if USE_AVX2
   796      /* 32-byte loop, full store */
   797      while (nb >= 32 && dn >= 32) {
   798          __m256i vv = _mm256_loadu_si256  ((const void *)sp);
   799          __m256i rv = _mm256_find_html    (vv);
   800                       _mm256_storeu_si256 ((void *)dp, vv);
   801  
   802          /* check for matches */
   803          if ((mm = _mm256_movemask_epi8(rv)) != 0) {
   804              return sp - ss + __builtin_ctz(mm);
   805          }
   806  
   807          /* move to next block */
   808          sp += 32;
   809          dp += 32;
   810          nb -= 32;
   811          dn -= 32;
   812      }
   813  
   814      /* 32-byte test, partial store */
   815      if (nb >= 32) {
   816          __m256i  vv = _mm256_loadu_si256   ((const void *)sp);
   817          __m256i  rv = _mm256_find_html     (vv);
   818          uint32_t mv = _mm256_movemask_epi8 (rv);
   819          uint32_t fv = __builtin_ctzll      ((uint64_t)mv | 0x0100000000);
   820  
   821          /* copy at most `dn` characters */
   822          if (fv <= dn) {
   823              memcpy_p32(dp, sp, fv);
   824              return sp - ss + fv;
   825          } else {
   826              memcpy_p32(dp, sp, dn);
   827              return -(sp - ss + dn) - 1;
   828          }
   829      }
   830  
   831      /* clear upper half to avoid AVX-SSE transition penalty */
   832      _mm256_zeroupper();
   833  #endif
   834  
   835      /* 16-byte loop, full store */
   836      while (nb >= 16 && dn >= 16) {
   837          __m128i vv = _mm_loadu_si128  ((const void *)sp);
   838          __m128i rv =  _mm_find_html   (vv);
   839                       _mm_storeu_si128 ((void *)dp, vv);
   840  
   841          /* check for matches */
   842          if ((mm = _mm_movemask_epi8(rv)) != 0) {
   843              return sp - ss + __builtin_ctz(mm);
   844          }
   845  
   846          /* move to next block */
   847          sp += 16;
   848          dp += 16;
   849          nb -= 16;
   850          dn -= 16;
   851      }
   852  
   853      /* 16-byte test, partial store */
   854      if (nb >= 16) {
   855          __m128i  vv = _mm_loadu_si128   ((const void *)sp);
   856          __m128i  rv =  _mm_find_html    (vv);
   857          uint32_t mv = _mm_movemask_epi8 (rv);
   858          uint32_t fv = __builtin_ctz     (mv | 0x010000);
   859  
   860          /* copy at most `dn` characters */
   861          if (fv <= dn) {
   862              memcpy_p16(dp, sp, fv);
   863              return sp - ss + fv;
   864          } else {
   865              memcpy_p16(dp, sp, dn);
   866              return -(sp - ss + dn) - 1;
   867          }
   868      }
   869  
   870      /* handle the remaining bytes with scalar code */
   871      while (nb > 0 && dn > 0) {
   872          if (*sp == '<' || *sp == '>' || *sp == '&' || *sp == '\xe2') {
   873              return sp - ss;
   874          } else {
   875              dn--, nb--;
   876              *dp++ = *sp++;
   877          }
   878      }
   879  
   880      /* check for dest buffer */
   881      if (nb == 0) {
   882          return sp - ss;
   883      } else {
   884          return -(sp - ss) - 1;
   885      }
   886  }
   887  
   888  ssize_t html_escape(const char *sp, ssize_t nb, char *dp, ssize_t *dn) {
   889      ssize_t          nd  = *dn;
   890      const char     * ds  = dp;
   891      const char     * ss  = sp;
   892      const quoted_t * tab = _HtmlQuoteTab;
   893  
   894      /* find the special characters, copy on the fly */
   895      while (nb > 0) {
   896          int     nc = 0;
   897          uint8_t ch = 0;
   898          ssize_t rb = 0;
   899          const char * cur = 0;
   900  
   901          /* not enough buffer space */
   902          if (nd <= 0) {
   903              return -(sp - ss) - 1;
   904          }
   905  
   906          /* find and copy */
   907          if ((rb = memcchr_html_quote(sp, nb, dp, nd)) < 0) {
   908              *dn = dp - ds - rb - 1;
   909              return -(sp - ss - rb - 1) - 1;
   910          }
   911  
   912          /* skip already copied bytes */
   913          sp += rb;
   914          dp += rb;
   915          nb -= rb;
   916          nd -= rb;
   917  
   918          /* stop if already finished */
   919          if (nb <= 0) {
   920              break;
   921          }
   922  
   923          /* mark cur postion */
   924          cur = sp;
   925  
   926          /* check for \u2028 and \u2029, binary is \xe2\x80\xa8 and \xe2\x80\xa9 */
   927          if (unlikely(*sp == '\xe2')) {
   928              if (nb >= 3 && *(sp+1) == '\x80' && (*(sp+2) == '\xa8' || *(sp+2) == '\xa9')) {
   929                  sp += 2, nb -= 2;
   930              } else if (nd > 0) {
   931                  *dp++ = *sp++;
   932                  nb--, nd--;
   933                  continue;
   934              } else {
   935                  return -(sp - ss) - 1;
   936              }
   937          }
   938  
   939          /* get the escape entry, handle consecutive quotes */
   940          ch = * (uint8_t*) sp;
   941          nc = tab[ch].n;
   942  
   943  
   944          /* check for buffer space */
   945          if (nd < nc) {
   946              *dn = dp - ds;
   947              return -(cur - ss) - 1;
   948          }
   949  
   950          /* copy the quoted value */
   951          memcpy_p8(dp, tab[ch].s, nc);
   952          sp++;
   953          nb--;
   954          dp += nc;
   955          nd -= nc;
   956      }
   957  
   958      /* all done */
   959      *dn = dp - ds;
   960      return sp - ss;
   961  }
   962  
   963  #undef MAX_ESCAPED_BYTES
   964  
   965  static inline long unescape(const char** src, const char* end, char* dp) {
   966      const char* sp = *src;
   967      long nb = end - sp;
   968      char cc = 0;
   969      uint32_t r0, r1;
   970  
   971      if (nb <= 0) return -ERR_EOF;
   972  
   973      if ((cc = _UnquoteTab[(uint8_t)sp[1]]) == 0) {
   974          *src += 1;
   975          return -ERR_ESCAPE;
   976      }
   977  
   978      if (cc != -1) {
   979          *dp = cc;
   980          *src += 2;
   981          return 1;
   982      }
   983  
   984      if (nb < 4) {
   985          *src += 1;
   986          return -ERR_EOF;
   987      }
   988  
   989      /* check for hexadecimal characters */
   990      if (!unhex16_is(sp + 2)) {
   991          *src += 2;
   992          return -ERR_INVAL;
   993      }
   994  
   995      /* decode the code-point */
   996      r0 = unhex16_fast(sp + 2);
   997      sp += 6;
   998      *src = sp;
   999  
  1000      /* ASCII characters, unlikely */
  1001      if (unlikely(r0 <= 0x7f)) {
  1002          *dp++ = (char)r0;
  1003          return 1;
  1004      }
  1005  
  1006      /* latin-1 characters, unlikely */
  1007      if (unlikely(r0 <= 0x07ff)) {
  1008          *dp++ = (char)(0xc0 | (r0 >> 6));
  1009          *dp++ = (char)(0x80 | (r0 & 0x3f));
  1010          return 2;
  1011      }
  1012  
  1013      /* 3-byte characters, likely */
  1014      if (likely(r0 < 0xd800 || r0 > 0xdfff)) {
  1015          *dp++ = (char)(0xe0 | ((r0 >> 12)       ));
  1016          *dp++ = (char)(0x80 | ((r0 >>  6) & 0x3f));
  1017          *dp++ = (char)(0x80 | ((r0      ) & 0x3f));
  1018          return 3;
  1019      }
  1020  
  1021      /* surrogate half, must follows by the other half */
  1022      if (nb < 6 || r0 > 0xdbff || sp[0] != '\\' || sp[1] != 'u') {
  1023          return -ERR_UNICODE;
  1024      }
  1025  
  1026      /* check the hexadecimal escape */
  1027      if (!unhex16_is(sp + 2)) {
  1028          *src += 2;
  1029          return -ERR_INVAL;
  1030      }
  1031  
  1032      /* decode the second code-point */
  1033      r1 = unhex16_fast(sp + 2);
  1034  
  1035      /* it must be the other half */
  1036      if (r1 < 0xdc00 || r1 > 0xdfff) {
  1037          *src += 2;
  1038          return -ERR_UNICODE;
  1039      }
  1040  
  1041      /* merge two surrogates */
  1042      r0 = (r0 - 0xd800) << 10;
  1043      r1 = (r1 - 0xdc00) + 0x010000;
  1044      r0 += r1;
  1045  
  1046      /* encode the character */
  1047      *dp++ = (char)(0xf0 | ((r0 >> 18)       ));
  1048      *dp++ = (char)(0x80 | ((r0 >> 12) & 0x3f));
  1049      *dp++ = (char)(0x80 | ((r0 >>  6) & 0x3f));
  1050      *dp++ = (char)(0x80 | ((r0      ) & 0x3f));
  1051      *src = sp + 6;
  1052      return 4;
  1053  }