github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/examples/gno.land/p/demo/json/escape.gno (about) 1 package json 2 3 import ( 4 "bytes" 5 "errors" 6 "unicode/utf8" 7 ) 8 9 const ( 10 supplementalPlanesOffset = 0x10000 11 highSurrogateOffset = 0xD800 12 lowSurrogateOffset = 0xDC00 13 surrogateEnd = 0xDFFF 14 basicMultilingualPlaneOffset = 0xFFFF 15 badHex = -1 16 ) 17 18 var hexLookupTable = [256]int{ 19 '0': 0x0, '1': 0x1, '2': 0x2, '3': 0x3, '4': 0x4, 20 '5': 0x5, '6': 0x6, '7': 0x7, '8': 0x8, '9': 0x9, 21 'A': 0xA, 'B': 0xB, 'C': 0xC, 'D': 0xD, 'E': 0xE, 'F': 0xF, 22 'a': 0xA, 'b': 0xB, 'c': 0xC, 'd': 0xD, 'e': 0xE, 'f': 0xF, 23 // Fill unspecified index-value pairs with key and value of -1 24 'G': -1, 'H': -1, 'I': -1, 'J': -1, 25 'K': -1, 'L': -1, 'M': -1, 'N': -1, 26 'O': -1, 'P': -1, 'Q': -1, 'R': -1, 27 'S': -1, 'T': -1, 'U': -1, 'V': -1, 28 'W': -1, 'X': -1, 'Y': -1, 'Z': -1, 29 'g': -1, 'h': -1, 'i': -1, 'j': -1, 30 'k': -1, 'l': -1, 'm': -1, 'n': -1, 31 'o': -1, 'p': -1, 'q': -1, 'r': -1, 32 's': -1, 't': -1, 'u': -1, 'v': -1, 33 'w': -1, 'x': -1, 'y': -1, 'z': -1, 34 } 35 36 func h2i(c byte) int { 37 return hexLookupTable[c] 38 } 39 40 // Unescape takes an input byte slice, processes it to Unescape certain characters, 41 // and writes the result into an output byte slice. 42 // 43 // it returns the processed slice and any error encountered during the Unescape operation. 44 func Unescape(input, output []byte) ([]byte, error) { 45 // find the index of the first backslash in the input slice. 46 firstBackslash := bytes.IndexByte(input, backSlash) 47 if firstBackslash == -1 { 48 return input, nil 49 } 50 51 // ensure the output slice has enough capacity to hold the result. 52 inputLen := len(input) 53 if cap(output) < inputLen { 54 output = make([]byte, inputLen) 55 } 56 57 output = output[:inputLen] 58 copy(output, input[:firstBackslash]) 59 60 input = input[firstBackslash:] 61 buf := output[firstBackslash:] 62 63 for len(input) > 0 { 64 inLen, bufLen, err := processEscapedUTF8(input, buf) 65 if err != nil { 66 return nil, err 67 } 68 69 input = input[inLen:] // the number of bytes consumed in the input 70 buf = buf[bufLen:] // the number of bytes written to buf 71 72 // find the next backslash in the remaining input 73 nextBackslash := bytes.IndexByte(input, backSlash) 74 if nextBackslash == -1 { 75 copy(buf, input) 76 buf = buf[len(input):] 77 break 78 } 79 80 copy(buf, input[:nextBackslash]) 81 82 input = input[nextBackslash:] 83 buf = buf[nextBackslash:] 84 } 85 86 return output[:len(output)-len(buf)], nil 87 } 88 89 // isSurrogatePair returns true if the rune is a surrogate pair. 90 // 91 // A surrogate pairs are used in UTF-16 encoding to encode characters 92 // outside the Basic Multilingual Plane (BMP). 93 func isSurrogatePair(r rune) bool { 94 return highSurrogateOffset <= r && r <= surrogateEnd 95 } 96 97 // combineSurrogates reconstruct the original unicode code points in the 98 // supplemental plane by combinin the high and low surrogate. 99 // 100 // The hight surrogate in the range from U+D800 to U+DBFF, 101 // and the low surrogate in the range from U+DC00 to U+DFFF. 102 // 103 // The formula to combine the surrogates is: 104 // (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000 105 func combineSurrogates(high, low rune) rune { 106 return ((high - highSurrogateOffset) << 10) + (low - lowSurrogateOffset) + supplementalPlanesOffset 107 } 108 109 // deocdeSingleUnicodeEscape decodes a unicode escape sequence (e.g., \uXXXX) into a rune. 110 func decodeSingleUnicodeEscape(b []byte) (rune, bool) { 111 if len(b) < 6 { 112 return utf8.RuneError, false 113 } 114 115 // convert hex to decimal 116 h1, h2, h3, h4 := h2i(b[2]), h2i(b[3]), h2i(b[4]), h2i(b[5]) 117 if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex { 118 return utf8.RuneError, false 119 } 120 121 return rune(h1<<12 + h2<<8 + h3<<4 + h4), true 122 } 123 124 // decodeUnicodeEscape decodes a Unicode escape sequence from a byte slice. 125 func decodeUnicodeEscape(b []byte) (rune, int) { 126 r, ok := decodeSingleUnicodeEscape(b) 127 if !ok { 128 return utf8.RuneError, -1 129 } 130 131 // determine valid unicode escapes within the BMP 132 if r <= basicMultilingualPlaneOffset && !isSurrogatePair(r) { 133 return r, 6 134 } 135 136 // Decode the following escape sequence to verify a UTF-16 susergate pair. 137 r2, ok := decodeSingleUnicodeEscape(b[6:]) 138 if !ok { 139 return utf8.RuneError, -1 140 } 141 142 if r2 < lowSurrogateOffset { 143 return utf8.RuneError, -1 144 } 145 146 return combineSurrogates(r, r2), 12 147 } 148 149 var escapeByteSet = [256]byte{ 150 '"': doubleQuote, 151 '\\': backSlash, 152 '/': slash, 153 'b': backSpace, 154 'f': formFeed, 155 'n': newLine, 156 'r': carriageReturn, 157 't': tab, 158 } 159 160 // Unquote takes a byte slice and unquotes it by removing 161 // the surrounding quotes and unescaping the contents. 162 func Unquote(s []byte, border byte) (string, bool) { 163 s, ok := unquoteBytes(s, border) 164 return string(s), ok 165 } 166 167 // unquoteBytes takes a byte slice and unquotes it by removing 168 // TODO: consider to move this function to the strconv package. 169 func unquoteBytes(s []byte, border byte) ([]byte, bool) { 170 if len(s) < 2 || s[0] != border || s[len(s)-1] != border { 171 return nil, false 172 } 173 174 s = s[1 : len(s)-1] 175 176 r := 0 177 for r < len(s) { 178 c := s[r] 179 180 if c == backSlash || c == border || c < 0x20 { 181 break 182 } 183 184 if c < utf8.RuneSelf { 185 r++ 186 continue 187 } 188 189 rr, size := utf8.DecodeRune(s[r:]) 190 if rr == utf8.RuneError && size == 1 { 191 break 192 } 193 194 r += size 195 } 196 197 if r == len(s) { 198 return s, true 199 } 200 201 utfDoubleMax := utf8.UTFMax * 2 202 b := make([]byte, len(s)+utfDoubleMax) 203 w := copy(b, s[0:r]) 204 205 for r < len(s) { 206 if w >= len(b)-utf8.UTFMax { 207 nb := make([]byte, utfDoubleMax+(2*len(b))) 208 copy(nb, b) 209 b = nb 210 } 211 212 c := s[r] 213 if c == backSlash { 214 r++ 215 if r >= len(s) { 216 return nil, false 217 } 218 219 if s[r] == 'u' { 220 rr, res := decodeUnicodeEscape(s[r-1:]) 221 if res < 0 { 222 return nil, false 223 } 224 225 w += utf8.EncodeRune(b[w:], rr) 226 r += 5 227 } else { 228 decode := escapeByteSet[s[r]] 229 if decode == 0 { 230 return nil, false 231 } 232 233 if decode == doubleQuote || decode == backSlash || decode == slash { 234 decode = s[r] 235 } 236 237 b[w] = decode 238 r++ 239 w++ 240 } 241 } else if c == border || c < 0x20 { 242 return nil, false 243 } else if c < utf8.RuneSelf { 244 b[w] = c 245 r++ 246 w++ 247 } else { 248 rr, size := utf8.DecodeRune(s[r:]) 249 250 if rr == utf8.RuneError && size == 1 { 251 return nil, false 252 } 253 254 r += size 255 w += utf8.EncodeRune(b[w:], rr) 256 } 257 } 258 259 return b[:w], true 260 } 261 262 // processEscapedUTF8 processes the escape sequence in the given byte slice and 263 // and converts them to UTF-8 characters. The function returns the length of the processed input and output. 264 // 265 // The input 'in' must contain the escape sequence to be processed, 266 // and 'out' provides a space to store the converted characters. 267 // 268 // The function returns (input length, output length) if the escape sequence is correct. 269 // Unicode escape sequences (e.g. \uXXXX) are decoded to UTF-8, other default escape sequences are 270 // converted to their corresponding special characters (e.g. \n -> newline). 271 // 272 // If the escape sequence is invalid, or if 'in' does not completely enclose the escape sequence, 273 // function returns (-1, -1) to indicate an error. 274 func processEscapedUTF8(in, out []byte) (int, int, error) { 275 if len(in) < 2 || in[0] != backSlash { 276 return -1, -1, errors.New("invalid escape sequence") 277 } 278 279 escapeSeqLen := 2 280 escapeChar := in[1] 281 282 if escapeChar != 'u' { 283 val := escapeByteSet[escapeChar] 284 if val == 0 { 285 return -1, -1, errors.New("invalid escape sequence") 286 } 287 288 out[0] = val 289 return escapeSeqLen, 1, nil 290 } 291 292 r, size := decodeUnicodeEscape(in) 293 if size == -1 { 294 return -1, -1, errors.New("invalid escape sequence") 295 } 296 297 outLen := utf8.EncodeRune(out, r) 298 299 return size, outLen, nil 300 }