github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/html_escape.c (about) 1 2 #include "parsing.h" 3 4 ssize_t html_escape(const char *sp, ssize_t nb, char *dp, ssize_t *dn) { 5 ssize_t nd = *dn; 6 const char * ds = dp; 7 const char * ss = sp; 8 const quoted_t * tab = _HtmlQuoteTab; 9 10 /* find the special characters, copy on the fly */ 11 while (nb > 0) { 12 int nc = 0; 13 uint8_t ch = 0; 14 ssize_t rb = 0; 15 const char * cur = 0; 16 17 /* not enough buffer space */ 18 if (nd <= 0) { 19 return -(sp - ss) - 1; 20 } 21 22 /* find and copy */ 23 if ((rb = memcchr_html_quote(sp, nb, dp, nd)) < 0) { 24 *dn = dp - ds - rb - 1; 25 return -(sp - ss - rb - 1) - 1; 26 } 27 28 /* skip already copied bytes */ 29 sp += rb; 30 dp += rb; 31 nb -= rb; 32 nd -= rb; 33 34 /* stop if already finished */ 35 if (nb <= 0) { 36 break; 37 } 38 39 /* mark cur postion */ 40 cur = sp; 41 42 /* check for \u2028 and \u2029, binary is \xe2\x80\xa8 and \xe2\x80\xa9 */ 43 if (unlikely(*sp == '\xe2')) { 44 if (nb >= 3 && *(sp+1) == '\x80' && (*(sp+2) == '\xa8' || *(sp+2) == '\xa9')) { 45 sp += 2, nb -= 2; 46 } else if (nd > 0) { 47 *dp++ = *sp++; 48 nb--, nd--; 49 continue; 50 } else { 51 return -(sp - ss) - 1; 52 } 53 } 54 55 /* get the escape entry, handle consecutive quotes */ 56 ch = * (uint8_t*) sp; 57 nc = tab[ch].n; 58 59 60 /* check for buffer space */ 61 if (nd < nc) { 62 *dn = dp - ds; 63 return -(cur - ss) - 1; 64 } 65 66 /* copy the quoted value */ 67 memcpy_p8(dp, tab[ch].s, nc); 68 sp++; 69 nb--; 70 dp += nc; 71 nd -= nc; 72 } 73 74 /* all done */ 75 *dn = dp - ds; 76 return sp - ss; 77 }