github.com/cloudwego/kitex@v0.9.0/pkg/utils/json.go (about) 1 /* 2 * MIT License 3 * 4 * Copyright (c) 2016 json-iterator 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * The source code of this file is written based on json-iterator, 25 * all modifications are Copyright 2021 CloudWeGo Authors. 26 */ 27 28 package utils 29 30 import ( 31 "errors" 32 "fmt" 33 "strings" 34 "unicode/utf16" 35 "unicode/utf8" 36 "unsafe" 37 ) 38 39 // const of json keyword char 40 const ( 41 EmptyJSON = "{}" 42 Comma = ',' 43 Colon = ':' 44 DQuotation = '"' 45 LeftBrace = '{' 46 RightBrace = '}' 47 ) 48 49 const ( 50 t1 = 0x00 // 0000 0000 51 tx = 0x80 // 1000 0000 52 t2 = 0xC0 // 1100 0000 53 t3 = 0xE0 // 1110 0000 54 t4 = 0xF0 // 1111 0000 55 t5 = 0xF8 // 1111 1000 56 57 maskx = 0x3F // 0011 1111 58 59 rune1Max = 1<<7 - 1 60 rune2Max = 1<<11 - 1 61 rune3Max = 1<<16 - 1 62 63 surrogateMin = 0xD800 64 surrogateMax = 0xDFFF 65 66 maxRune = '\U0010FFFF' // Maximum valid Unicode code point. 67 runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character" 68 69 hex = "0123456789abcdef" 70 ) 71 72 // Map2JSONStr transform map[string]string to json str, perf is better than use json lib directly 73 func Map2JSONStr(mapInfo map[string]string) (str string, err error) { 74 defer func() { 75 if r := recover(); r != nil { 76 if e, ok := r.(error); ok { 77 err = fmt.Errorf("Map2JSONStr panic: %w", e) 78 } else { 79 err = fmt.Errorf("Map2JSONStr panic: %+v", r) 80 } 81 } 82 }() 83 size := len(mapInfo) 84 if mapInfo == nil || size == 0 { 85 return EmptyJSON, nil 86 } 87 // calculate actual byte size that avoid allocate mem multi times 88 idx := 0 89 byteSize := 2 90 for k, v := range mapInfo { 91 byteSize += len(k) + len(v) + 5 92 if idx++; idx < size { 93 byteSize++ 94 } 95 } 96 var strBuilder strings.Builder 97 strBuilder.Grow(byteSize) 98 strBuilder.WriteByte(LeftBrace) 99 idx = 0 100 for k, v := range mapInfo { 101 wrapStrWithQuotation(k, &strBuilder) 102 strBuilder.WriteByte(Colon) 103 wrapStrWithQuotation(v, &strBuilder) 104 if idx++; idx < size { 105 strBuilder.WriteByte(Comma) 106 } 107 } 108 strBuilder.WriteByte(RightBrace) 109 return strBuilder.String(), nil 110 } 111 112 // JSONStr2Map transform json str to map[string]string, perf is better than use json lib directly 113 func JSONStr2Map(jsonStr string) (mapInfo map[string]string, err error) { 114 defer func() { 115 if r := recover(); r != nil { 116 if e, ok := r.(error); ok { 117 err = fmt.Errorf("JSONStr2Map panic: %w", e) 118 } else { 119 err = fmt.Errorf("JSONStr2Map panic: %+v", r) 120 } 121 } 122 }() 123 data := []byte(jsonStr) 124 size := len(data) 125 lastIdx := size - 1 126 idx := 0 127 var c byte 128 if c, idx, err = nextToken(data, idx, lastIdx); err != nil { 129 return 130 } 131 var isNull bool 132 if idx, isNull = checkNull(c, data, idx, lastIdx); isNull { 133 return 134 } 135 if c != LeftBrace || data[size-1] != RightBrace { 136 err = fmt.Errorf("json str is invalid") 137 return 138 } 139 if ch, _, _ := nextToken(data, idx, lastIdx); ch == RightBrace { 140 return 141 } 142 143 mapInfo = make(map[string]string) 144 for ; c == Comma || c == LeftBrace; c, idx, err = nextToken(data, idx, lastIdx) { 145 if err != nil { 146 err = fmt.Errorf("json str is invalid") 147 return 148 } 149 var key, val string 150 if key, idx, err = readString(data, idx, lastIdx); err != nil { 151 return 152 } 153 if c, idx, err = nextToken(data, idx, lastIdx); c != ':' || err != nil { 154 err = fmt.Errorf("json str is invalid, expect ':' after object field, but found %s", string(c)) 155 return 156 } 157 if val, idx, err = readString(data, idx, lastIdx); err != nil { 158 return 159 } 160 mapInfo[key] = val 161 } 162 return mapInfo, err 163 } 164 165 func readString(buf []byte, idx, lastIdx int) (string, int, error) { 166 var err error 167 var c byte 168 var isNull bool 169 if c, idx, err = nextToken(buf, idx, lastIdx); err != nil { 170 return "", idx, err 171 } 172 var str []byte 173 if c == '"' { 174 start := idx 175 noESC := true 176 for idx <= lastIdx { 177 if c, idx, err = readByte(buf, idx, lastIdx); err != nil { 178 return "", idx, err 179 } 180 switch c { 181 case '"': 182 if start < idx-1 { 183 if noESC { 184 str = buf[start : idx-1] 185 } else { 186 str = append(str, buf[start:idx-1]...) 187 } 188 } 189 return *(*string)(unsafe.Pointer(&str)), idx, nil 190 case '\\': 191 if start < idx-1 { 192 if noESC { 193 str = buf[start : idx-1] 194 } else { 195 str = append(str, buf[start:idx-1]...) 196 } 197 } 198 if c, idx, err = readByte(buf, idx, lastIdx); err != nil { 199 return "", idx, err 200 } 201 if str, idx, err = readEscapedChar(c, buf, idx, str, lastIdx); err != nil { 202 return "", 0, err 203 } 204 start = idx 205 noESC = false 206 } 207 } 208 } else if idx, isNull = checkNull(c, buf, idx, lastIdx); isNull { 209 return "", idx, nil 210 } 211 err = fmt.Errorf("json str is invalid, expects '\"' or n, but found %s", string(c)) 212 return *(*string)(unsafe.Pointer(&str)), idx, err 213 } 214 215 func readByte(buf []byte, idx, lastIdx int) (byte, int, error) { 216 if lastIdx < idx { 217 return 0, -1, fmt.Errorf("readByte no more data") 218 } 219 c := buf[idx] 220 idx++ 221 return c, idx, nil 222 } 223 224 func nextToken(buf []byte, idx, lastIdx int) (byte, int, error) { 225 if lastIdx < idx { 226 return 0, -1, errors.New("nextToken no more data") 227 } 228 var c byte 229 for idx <= lastIdx { 230 c = buf[idx] 231 idx++ 232 switch c { 233 case ' ', '\n', '\t', '\r': 234 continue 235 } 236 return c, idx, nil 237 } 238 return c, idx, nil 239 } 240 241 func checkNull(c byte, data []byte, idx, lastIdx int) (int, bool) { 242 if c == 'n' { 243 ch, idx, _ := readByte(data, idx, lastIdx) 244 if ch != 'u' { 245 idx-- 246 return idx, false 247 } 248 ch, idx, _ = readByte(data, idx, lastIdx) 249 if ch != 'l' { 250 idx-- 251 return idx, false 252 } 253 ch, idx, _ = readByte(data, idx, lastIdx) 254 if ch != 'l' { 255 idx-- 256 return idx, false 257 } 258 return idx, true 259 } 260 return idx, false 261 } 262 263 func readU4(buf []byte, idx, lastIdx int) (rune, int, error) { 264 var err error 265 var ret rune 266 for i := 0; i < 4; i++ { 267 var c byte 268 if c, idx, err = readByte(buf, idx, lastIdx); err != nil { 269 return ret, idx, err 270 } 271 if c >= '0' && c <= '9' { 272 ret = ret*16 + rune(c-'0') 273 } else if c >= 'a' && c <= 'f' { 274 ret = ret*16 + rune(c-'a'+10) 275 } else if c >= 'A' && c <= 'F' { 276 ret = ret*16 + rune(c-'A'+10) 277 } else { 278 return ret, idx, fmt.Errorf("unicode invalid: expects 0~9 or a~f, but found %v", string([]byte{c})) 279 } 280 } 281 return ret, idx, nil 282 } 283 284 // refer to json-iterator/go/iter_str readEscapedChar 285 func readEscapedChar(c byte, buf []byte, idx int, str []byte, lastIdx int) ([]byte, int, error) { 286 var err error 287 switch c { 288 case 'u': 289 var r rune 290 if r, idx, err = readU4(buf, idx, lastIdx); err != nil { 291 return str, idx, err 292 } 293 // 是否是扩展字符 294 if utf16.IsSurrogate(r) { 295 if c, idx, err = readByte(buf, idx, lastIdx); err != nil { 296 return str, idx, err 297 } 298 if c != '\\' { 299 idx-- 300 str = appendRune(str, r) 301 return str, idx, nil 302 } 303 if c, idx, err = readByte(buf, idx, lastIdx); err != nil { 304 return str, idx, err 305 } 306 if c != 'u' { 307 str = appendRune(str, r) 308 return readEscapedChar(c, buf, idx, str, lastIdx) 309 } 310 var r2 rune 311 if r2, idx, err = readU4(buf, idx, lastIdx); err != nil { 312 return str, idx, err 313 } 314 combined := utf16.DecodeRune(r, r2) 315 if combined == '\uFFFD' { 316 str = appendRune(str, r) 317 str = appendRune(str, r2) 318 } else { 319 str = appendRune(str, combined) 320 } 321 } else { 322 str = appendRune(str, r) 323 } 324 case '"': 325 str = append(str, '"') 326 case '\\': 327 str = append(str, '\\') 328 case '/': 329 str = append(str, '/') 330 case 'b': 331 str = append(str, '\b') 332 case 'f': 333 str = append(str, '\f') 334 case 'n': 335 str = append(str, '\n') 336 case 'r': 337 str = append(str, '\r') 338 case 't': 339 str = append(str, '\t') 340 default: 341 return str, idx, errors.New("invalid escape char after \\") 342 } 343 return str, idx, nil 344 } 345 346 // refer to json-iterator/go/stream_str writeStringSlowPath 347 func wrapStrWithQuotation(s string, strBuilder *strings.Builder) { 348 strBuilder.WriteByte(DQuotation) 349 valLen := len(s) 350 i := 0 351 start := i 352 for i < valLen { 353 c := s[i] 354 if c < utf8.RuneSelf && htmlSafeSet[c] { 355 i++ 356 continue 357 } else { 358 if b := s[i]; b < utf8.RuneSelf { 359 if start < i { 360 strBuilder.WriteString(s[start:i]) 361 } 362 switch b { 363 case '\\', '"': 364 strBuilder.WriteByte('\\') 365 strBuilder.WriteByte(b) 366 case '\n': 367 strBuilder.WriteByte('\\') 368 strBuilder.WriteByte('n') 369 case '\r': 370 strBuilder.WriteByte('\\') 371 strBuilder.WriteByte('r') 372 case '\t': 373 strBuilder.WriteByte('\\') 374 strBuilder.WriteByte('t') 375 default: 376 // This encodes bytes < 0x20 except for \t, \n and \r. 377 // If escapeHTML is set, it also escapes <, >, and & 378 // because they can lead to security holes when 379 // user-controlled strings are rendered into JSON 380 // and served to some browsers. 381 strBuilder.WriteString(`\u00`) 382 strBuilder.WriteByte(hex[b>>4]) 383 strBuilder.WriteByte(hex[b&0xF]) 384 } 385 i++ 386 start = i 387 continue 388 } 389 c, size := utf8.DecodeRuneInString(s[i:]) 390 if c == utf8.RuneError && size == 1 { 391 if start < i { 392 strBuilder.WriteString(s[start:i]) 393 } 394 strBuilder.WriteString(`\ufffd`) 395 i++ 396 start = i 397 continue 398 } 399 // U+2028 is LINE SEPARATOR. 400 // U+2029 is PARAGRAPH SEPARATOR. 401 // They are both technically valid characters in JSON strings, 402 // but don't work in JSONP, which has to be evaluated as JavaScript, 403 // and can lead to security holes there. It is valid JSON to 404 // escape them, so we do so unconditionally. 405 // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. 406 if c == '\u2028' || c == '\u2029' { 407 if start < i { 408 strBuilder.WriteString(s[start:i]) 409 } 410 strBuilder.WriteString(`\u202`) 411 strBuilder.WriteByte(hex[c&0xF]) 412 i += size 413 start = i 414 continue 415 } 416 i += size 417 } 418 } 419 if start < valLen { 420 strBuilder.WriteString(s[start:]) 421 } 422 strBuilder.WriteByte(DQuotation) 423 } 424 425 // refer to json-iterator/go/iter_str appendRune 426 func appendRune(p []byte, r rune) []byte { 427 // Negative values are erroneous. Making it unsigned addresses the problem. 428 switch i := uint32(r); { 429 case i <= rune1Max: 430 p = append(p, byte(r)) 431 return p 432 case i <= rune2Max: 433 p = append(p, t2|byte(r>>6)) 434 p = append(p, tx|byte(r)&maskx) 435 return p 436 case i > maxRune, surrogateMin <= i && i <= surrogateMax: 437 r = runeError 438 fallthrough 439 case i <= rune3Max: 440 p = append(p, t3|byte(r>>12)) 441 p = append(p, tx|byte(r>>6)&maskx) 442 p = append(p, tx|byte(r)&maskx) 443 return p 444 default: 445 p = append(p, t4|byte(r>>18)) 446 p = append(p, tx|byte(r>>12)&maskx) 447 p = append(p, tx|byte(r>>6)&maskx) 448 p = append(p, tx|byte(r)&maskx) 449 return p 450 } 451 } 452 453 var htmlSafeSet = [utf8.RuneSelf]bool{ 454 ' ': true, 455 '!': true, 456 '"': false, 457 '#': true, 458 '$': true, 459 '%': true, 460 '&': false, 461 '\'': true, 462 '(': true, 463 ')': true, 464 '*': true, 465 '+': true, 466 ',': true, 467 '-': true, 468 '.': true, 469 '/': true, 470 '0': true, 471 '1': true, 472 '2': true, 473 '3': true, 474 '4': true, 475 '5': true, 476 '6': true, 477 '7': true, 478 '8': true, 479 '9': true, 480 ':': true, 481 ';': true, 482 '<': false, 483 '=': true, 484 '>': false, 485 '?': true, 486 '@': true, 487 'A': true, 488 'B': true, 489 'C': true, 490 'D': true, 491 'E': true, 492 'F': true, 493 'G': true, 494 'H': true, 495 'I': true, 496 'J': true, 497 'K': true, 498 'L': true, 499 'M': true, 500 'N': true, 501 'O': true, 502 'P': true, 503 'Q': true, 504 'R': true, 505 'S': true, 506 'T': true, 507 'U': true, 508 'V': true, 509 'W': true, 510 'X': true, 511 'Y': true, 512 'Z': true, 513 '[': true, 514 '\\': false, 515 ']': true, 516 '^': true, 517 '_': true, 518 '`': true, 519 'a': true, 520 'b': true, 521 'c': true, 522 'd': true, 523 'e': true, 524 'f': true, 525 'g': true, 526 'h': true, 527 'i': true, 528 'j': true, 529 'k': true, 530 'l': true, 531 'm': true, 532 'n': true, 533 'o': true, 534 'p': true, 535 'q': true, 536 'r': true, 537 's': true, 538 't': true, 539 'u': true, 540 'v': true, 541 'w': true, 542 'x': true, 543 'y': true, 544 'z': true, 545 '{': true, 546 '|': true, 547 '}': true, 548 '~': true, 549 '\u007f': true, 550 }