github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/html_escape.c (about)

     1  
     2  #include "parsing.h"
     3  
     4  ssize_t html_escape(const char *sp, ssize_t nb, char *dp, ssize_t *dn) {
     5      ssize_t          nd  = *dn;
     6      const char     * ds  = dp;
     7      const char     * ss  = sp;
     8      const quoted_t * tab = _HtmlQuoteTab;
     9  
    10      /* find the special characters, copy on the fly */
    11      while (nb > 0) {
    12          int     nc = 0;
    13          uint8_t ch = 0;
    14          ssize_t rb = 0;
    15          const char * cur = 0;
    16  
    17          /* not enough buffer space */
    18          if (nd <= 0) {
    19              return -(sp - ss) - 1;
    20          }
    21  
    22          /* find and copy */
    23          if ((rb = memcchr_html_quote(sp, nb, dp, nd)) < 0) {
    24              *dn = dp - ds - rb - 1;
    25              return -(sp - ss - rb - 1) - 1;
    26          }
    27  
    28          /* skip already copied bytes */
    29          sp += rb;
    30          dp += rb;
    31          nb -= rb;
    32          nd -= rb;
    33  
    34          /* stop if already finished */
    35          if (nb <= 0) {
    36              break;
    37          }
    38  
    39          /* mark cur postion */
    40          cur = sp;
    41  
    42          /* check for \u2028 and \u2029, binary is \xe2\x80\xa8 and \xe2\x80\xa9 */
    43          if (unlikely(*sp == '\xe2')) {
    44              if (nb >= 3 && *(sp+1) == '\x80' && (*(sp+2) == '\xa8' || *(sp+2) == '\xa9')) {
    45                  sp += 2, nb -= 2;
    46              } else if (nd > 0) {
    47                  *dp++ = *sp++;
    48                  nb--, nd--;
    49                  continue;
    50              } else {
    51                  return -(sp - ss) - 1;
    52              }
    53          }
    54  
    55          /* get the escape entry, handle consecutive quotes */
    56          ch = * (uint8_t*) sp;
    57          nc = tab[ch].n;
    58  
    59  
    60          /* check for buffer space */
    61          if (nd < nc) {
    62              *dn = dp - ds;
    63              return -(cur - ss) - 1;
    64          }
    65  
    66          /* copy the quoted value */
    67          memcpy_p8(dp, tab[ch].s, nc);
    68          sp++;
    69          nb--;
    70          dp += nc;
    71          nd -= nc;
    72      }
    73  
    74      /* all done */
    75      *dn = dp - ds;
    76      return sp - ss;
    77  }