github.com/3JoB/go-json@v0.10.4/internal/decoder/string.go (about) 1 package decoder 2 3 import ( 4 "fmt" 5 "unicode/utf8" 6 "unsafe" 7 8 "github.com/3JoB/go-reflect" 9 10 "github.com/3JoB/go-json/internal/errors" 11 ) 12 13 type stringDecoder struct { 14 structName string 15 fieldName string 16 } 17 18 func newStringDecoder(structName, fieldName string) *stringDecoder { 19 return &stringDecoder{ 20 structName: structName, 21 fieldName: fieldName, 22 } 23 } 24 25 func (d *stringDecoder) errUnmarshalType(typeName string, offset int64) *errors.UnmarshalTypeError { 26 return &errors.UnmarshalTypeError{ 27 Value: typeName, 28 Type: reflect.TypeOf(""), 29 Offset: offset, 30 Struct: d.structName, 31 Field: d.fieldName, 32 } 33 } 34 35 func (d *stringDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error { 36 bytes, err := d.decodeStreamByte(s) 37 if err != nil { 38 return err 39 } 40 if bytes == nil { 41 return nil 42 } 43 **(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes)) 44 s.reset() 45 return nil 46 } 47 48 func (d *stringDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) { 49 bytes, c, err := d.decodeByte(ctx.Buf, cursor) 50 if err != nil { 51 return 0, err 52 } 53 if bytes == nil { 54 return c, nil 55 } 56 cursor = c 57 **(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes)) 58 return cursor, nil 59 } 60 61 func (d *stringDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) { 62 bytes, c, err := d.decodeByte(ctx.Buf, cursor) 63 if err != nil { 64 return nil, 0, err 65 } 66 if bytes == nil { 67 return [][]byte{nullbytes}, c, nil 68 } 69 return [][]byte{bytes}, c, nil 70 } 71 72 var ( 73 hexToInt = [256]int{ 74 '0': 0, 75 '1': 1, 76 '2': 2, 77 '3': 3, 78 '4': 4, 79 '5': 5, 80 '6': 6, 81 '7': 7, 82 '8': 8, 83 '9': 9, 84 'A': 10, 85 'B': 11, 86 'C': 12, 87 'D': 13, 88 'E': 14, 89 'F': 15, 90 'a': 10, 91 'b': 11, 92 'c': 12, 93 'd': 13, 94 'e': 14, 95 'f': 15, 96 } 97 ) 98 99 func unicodeToRune(code []byte) rune { 100 var r rune 101 for i := 0; i < len(code); i++ { 102 r = r*16 + rune(hexToInt[code[i]]) 103 } 104 return r 105 } 106 107 var isHex = [256]int8{ 108 '0': 1, 109 '1': 1, 110 '2': 2, 111 '3': 3, 112 '4': 4, 113 '5': 5, 114 '6': 6, 115 '7': 7, 116 '8': 8, 117 '9': 9, 118 'A': 10, 119 'B': 11, 120 'C': 12, 121 'D': 13, 122 'E': 14, 123 'F': 15, 124 'a': 10, 125 'b': 11, 126 'c': 12, 127 'd': 13, 128 'e': 14, 129 'f': 15, 130 } 131 132 var utf8First = [256]uint8{ 133 0xC2: 0x02, 0xC3: 0x02, 0xC4: 0x02, 0xC5: 0x02, 0xC6: 0x02, 0xC7: 0x02, 0xC8: 0x02, 0xC9: 0x02, 0xCA: 0x02, 0xCB: 0x02, 0xCC: 0x02, 0xCD: 0x02, 0xCE: 0x02, 0xCF: 0x02, 0xD0: 0x02, 0xD1: 0x02, 0xD2: 0x02, 0xD3: 0x02, 0xD4: 0x02, 0xD5: 0x02, 0xD6: 0x02, 0xD7: 0x02, 0xD8: 0x02, 0xD9: 0x02, 0xDA: 0x02, 0xDB: 0x02, 0xDC: 0x02, 0xDD: 0x02, 0xDE: 0x02, 0xDF: 0x02, 134 0xE0: 0x13, 135 0xE1: 0x03, 0xE2: 0x03, 0xE3: 0x03, 0xE4: 0x03, 0xE5: 0x03, 0xE6: 0x03, 0xE7: 0x03, 0xE8: 0x03, 0xE9: 0x03, 0xEA: 0x03, 0xEB: 0x03, 0xEC: 0x03, 0xEE: 0x03, 0xEF: 0x3, 136 0xED: 0x23, 137 0xF0: 0x34, 138 0xF1: 0x04, 0xF2: 0x04, 0xF3: 0x04, 139 0xF4: 0x44, 140 } 141 142 var utf8AcceptRanges = [16]struct{ lo, hi uint8 }{ 143 0: {lo: 0x80, hi: 0xBF}, 144 1: {lo: 0xA0, hi: 0xBF}, 145 2: {lo: 0x80, hi: 0x9F}, 146 3: {lo: 0x90, hi: 0xBF}, 147 4: {lo: 0x80, hi: 0x8F}, 148 } 149 150 var unescapeMap = [256]byte{ 151 '"': '"', 152 '\\': '\\', 153 '/': '/', 154 'b': '\b', 155 'f': '\f', 156 'n': '\n', 157 'r': '\r', 158 't': '\t', 159 'u': 'u', 160 } 161 162 const ( 163 inStringInvalidUTF8 = 0 164 inStringASCII = 1 165 inStringSentinel = 2 166 inStringStartEscape = 3 167 inStringEnd = 4 168 inStringStartMB = 5 169 ) 170 171 var inStringTypes [256]uint8 172 173 func init() { 174 for i := range inStringTypes { 175 inStringTypes[i] = inStringInvalidUTF8 176 } 177 for i := 0; i < 0x80; i++ { 178 inStringTypes[i] = inStringASCII 179 } 180 inStringTypes[nul] = inStringSentinel 181 inStringTypes['\\'] = inStringStartEscape 182 inStringTypes['"'] = inStringEnd 183 for i := 0xC2; i <= 0xF4; i++ { 184 inStringTypes[i] = inStringStartMB 185 } 186 } 187 188 func stringBytes(s *Stream) ([]byte, int64, error) { 189 _, cursor, p := s.stat() 190 cursor++ // skip double quote char 191 192 start := cursor 193 dst := cursor 194 inplace := true 195 first := int64(-1) 196 for { 197 c := char(p, cursor) 198 if t := inStringTypes[c]; t == inStringASCII { 199 cursor++ 200 dst++ 201 continue 202 } else if t == inStringStartMB { 203 x := utf8First[c] 204 sz := int64(x & 7) 205 if s.syncBufptr(s.requires(cursor, sz), &p) < 0 { 206 goto RuneError 207 } 208 accept := utf8AcceptRanges[x>>4] 209 c1 := char(p, cursor+1) 210 if c1 < accept.lo || accept.hi < c1 { 211 goto RuneError 212 } 213 if sz > 2 { 214 c2 := char(p, cursor+2) 215 if c2 < 0x80 || c2 > 0xBF { 216 goto RuneError 217 } 218 } 219 if sz > 3 { 220 c3 := char(p, cursor+3) 221 if c3 < 0x80 || c3 > 0xBF { 222 goto RuneError 223 } 224 } 225 cursor += sz 226 dst += sz 227 continue 228 } else if t == inStringStartEscape { 229 if first < 0 { 230 first = cursor 231 } 232 cursor++ 233 if s.syncBufptr(s.requires(cursor, 1), &p) < 0 { 234 goto ERROR 235 } 236 ec := char(p, cursor) 237 if unescapeMap[ec] == 0 { 238 return nil, cursor, errors.ErrInvalidCharacter(char(p, cursor), "in string escape code", cursor) 239 } 240 if ec != 'u' { 241 cursor++ 242 dst++ 243 continue 244 } 245 if s.syncBufptr(s.requires(cursor, 5), &p) < 0 { 246 goto ERROR 247 } 248 c1, c2, c3, c4 := char4(p, cursor+1) 249 if o := checkHex(c1, c2, c3, c4); o > 0 { 250 return nil, cursor + o, errors.ErrSyntax(fmt.Sprintf("json: invalid character %c in \\u hexadecimal character escape", char(p, cursor+o)), cursor+o) 251 } 252 r := decodeHexRune(c1, c2, c3, c4) 253 *ptrUint16(p, cursor+1) = uint16(r) 254 NextUnicode: 255 if r >= 0xD800 && r < 0xE000 { 256 const runeError = 65533 257 if s.syncBufptr(s.requires(cursor, 5+6), &p) >= 0 && char(p, cursor+5) == '\\' && char(p, cursor+6) == 'u' { 258 cursor2 := cursor + 6 259 c1, c2, c3, c4 := char4(p, cursor2+1) 260 if o := checkHex(c1, c2, c3, c4); o > 0 { 261 return nil, cursor2 + o, errors.ErrSyntax(fmt.Sprintf("json: invalid character %c in \\u hexadecimal character escape", char(p, cursor2+o)), cursor2+o) 262 } 263 r2 := decodeHexRune(c1, c2, c3, c4) 264 *ptrUint16(p, cursor2+1) = uint16(r2) 265 if r2 < 0xDC00 || r2 >= 0xE000 { 266 *ptrUint16(p, cursor+1) = runeError 267 dst += 3 268 cursor = cursor2 269 r = r2 270 goto NextUnicode 271 } 272 dst += 4 273 cursor = cursor2 + 5 274 } else { 275 *ptrUint16(p, cursor+1) = runeError 276 dst += 3 277 cursor += 5 278 } 279 } else { 280 cursor += 5 281 dst += runeLen(r) 282 } 283 continue 284 } else if t == inStringEnd { 285 if first < 0 { 286 return s.buf[start:cursor], cursor + 1, nil 287 } 288 if inplace { 289 src := unsafeAdd(p, int(first)) 290 unescapeString(src, src) 291 return s.buf[start:dst], cursor + 1, nil 292 } 293 src := unsafeAdd(p, int(start)) 294 b := make([]byte, dst-start+1) 295 data := (*sliceHeader)(unsafe.Pointer(&b)).data 296 unescapeString(src, data) 297 return b[:len(b)-1], cursor + 1, nil 298 } else if t == inStringSentinel { 299 if s.read() { 300 p = s.bufptr() 301 continue 302 } 303 goto ERROR 304 } 305 RuneError: 306 if first < 0 { 307 first = cursor 308 } 309 *(*byte)(unsafeAdd(p, int(cursor))) = nul 310 cursor++ 311 dst += 3 312 if cursor < dst { 313 inplace = false 314 } 315 } 316 ERROR: 317 return nil, s.length, errors.ErrUnexpectedEndOfJSON("string", s.offset+s.length) 318 } 319 320 func (d *stringDecoder) decodeStreamByte(s *Stream) ([]byte, error) { 321 for { 322 switch s.char() { 323 case ' ', '\n', '\t', '\r': 324 s.cursor++ 325 continue 326 case '[': 327 return nil, d.errUnmarshalType("array", s.totalOffset()) 328 case '{': 329 return nil, d.errUnmarshalType("object", s.totalOffset()) 330 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 331 return nil, d.errUnmarshalType("number", s.totalOffset()) 332 case '"': 333 b, cursor, err := stringBytes(s) 334 s.cursor = cursor 335 if err != nil { 336 return nil, err 337 } 338 return b, nil 339 case 'n': 340 if err := nullBytes(s); err != nil { 341 return nil, err 342 } 343 return nil, nil 344 case nul: 345 if s.read() { 346 continue 347 } 348 } 349 break 350 } 351 return nil, errors.ErrInvalidBeginningOfValue(s.char(), s.totalOffset()) 352 } 353 354 func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) { 355 for { 356 switch buf[cursor] { 357 case ' ', '\n', '\t', '\r': 358 cursor++ 359 case '[': 360 return nil, cursor, d.errUnmarshalType("array", cursor) 361 case '{': 362 return nil, cursor, d.errUnmarshalType("object", cursor) 363 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 364 return nil, cursor, d.errUnmarshalType("number", cursor) 365 case '"': 366 s := bytesStream{buf: buf, length: int64(len(buf))} 367 cursor++ 368 p := (*sliceHeader)(unsafe.Pointer(&buf)).data 369 370 start := cursor 371 dst := cursor 372 inplace := true 373 first := int64(-1) 374 for { 375 c := char(p, cursor) 376 if t := inStringTypes[c]; t == inStringASCII { 377 cursor++ 378 dst++ 379 continue 380 } else if t == inStringStartMB { 381 x := utf8First[c] 382 sz := int64(x & 7) 383 if s.syncBufptr(s.requires(cursor, sz), &p) < 0 { 384 goto RuneError 385 } 386 accept := utf8AcceptRanges[x>>4] 387 c1 := char(p, cursor+1) 388 if c1 < accept.lo || accept.hi < c1 { 389 goto RuneError 390 } 391 if sz > 2 { 392 c2 := char(p, cursor+2) 393 if c2 < 0x80 || c2 > 0xBF { 394 goto RuneError 395 } 396 } 397 if sz > 3 { 398 c3 := char(p, cursor+3) 399 if c3 < 0x80 || c3 > 0xBF { 400 goto RuneError 401 } 402 } 403 cursor += sz 404 dst += sz 405 continue 406 } else if t == inStringStartEscape { 407 if first < 0 { 408 first = cursor 409 } 410 cursor++ 411 if s.syncBufptr(s.requires(cursor, 1), &p) < 0 { 412 goto ERROR 413 } 414 ec := char(p, cursor) 415 if unescapeMap[ec] == 0 { 416 return nil, cursor, errors.ErrInvalidCharacter(char(p, cursor), "in string escape code", cursor) 417 } 418 if ec != 'u' { 419 cursor++ 420 dst++ 421 continue 422 } 423 if s.syncBufptr(s.requires(cursor, 5), &p) < 0 { 424 goto ERROR 425 } 426 c1, c2, c3, c4 := char4(p, cursor+1) 427 if o := checkHex(c1, c2, c3, c4); o > 0 { 428 return nil, cursor + o, errors.ErrSyntax(fmt.Sprintf("json: invalid character %c in \\u hexadecimal character escape", char(p, cursor+o)), cursor+o) 429 } 430 r := decodeHexRune(c1, c2, c3, c4) 431 *ptrUint16(p, cursor+1) = uint16(r) 432 NextUnicode: 433 if r >= 0xD800 && r < 0xE000 { 434 const runeError = 65533 435 if s.syncBufptr(s.requires(cursor, 5+6), &p) >= 0 && char(p, cursor+5) == '\\' && char(p, cursor+6) == 'u' { 436 cursor2 := cursor + 6 437 c1, c2, c3, c4 := char4(p, cursor2+1) 438 if o := checkHex(c1, c2, c3, c4); o > 0 { 439 return nil, cursor2 + o, errors.ErrSyntax(fmt.Sprintf("json: invalid character %c in \\u hexadecimal character escape", char(p, cursor2+o)), cursor2+o) 440 } 441 r2 := decodeHexRune(c1, c2, c3, c4) 442 *ptrUint16(p, cursor2+1) = uint16(r2) 443 if r2 < 0xDC00 || r2 >= 0xE000 { 444 *ptrUint16(p, cursor+1) = runeError 445 dst += 3 446 cursor = cursor2 447 r = r2 448 goto NextUnicode 449 } 450 dst += 4 451 cursor = cursor2 + 5 452 } else { 453 *ptrUint16(p, cursor+1) = runeError 454 dst += 3 455 cursor += 5 456 } 457 } else { 458 cursor += 5 459 dst += runeLen(r) 460 } 461 continue 462 } else if t == inStringEnd { 463 if first < 0 { 464 return s.buf[start:cursor], cursor + 1, nil 465 } 466 if inplace { 467 src := unsafeAdd(p, int(first)) 468 unescapeString(src, src) 469 return s.buf[start:dst], cursor + 1, nil 470 } 471 src := unsafeAdd(p, int(start)) 472 b := make([]byte, dst-start+1) 473 data := (*sliceHeader)(unsafe.Pointer(&b)).data 474 unescapeString(src, data) 475 return b[:len(b)-1], cursor + 1, nil 476 } else if t == inStringSentinel { 477 if s.read() { 478 p = s.bufptr() 479 continue 480 } 481 goto ERROR 482 } 483 RuneError: 484 if first < 0 { 485 first = cursor 486 } 487 *(*byte)(unsafeAdd(p, int(cursor))) = nul 488 cursor++ 489 dst += 3 490 if cursor < dst { 491 inplace = false 492 } 493 } 494 ERROR: 495 return nil, s.length, errors.ErrUnexpectedEndOfJSON("string", s.offset+s.length) 496 case nul: 497 return nil, cursor, errors.ErrUnexpectedEndOfJSON("string", cursor) 498 case 'n': 499 if err := validateNull(buf, cursor); err != nil { 500 return nil, cursor, err 501 } 502 return nil, cursor + 4, nil 503 default: 504 return nil, cursor, errors.ErrInvalidBeginningOfValue(buf[cursor], cursor) 505 } 506 } 507 } 508 509 func unsafeAdd(ptr unsafe.Pointer, offset int) unsafe.Pointer { 510 return unsafe.Pointer(uintptr(ptr) + uintptr(offset)) 511 } 512 513 func unescapeString(src, dst unsafe.Pointer) { 514 for { 515 c := char(src, 0) 516 switch c { 517 case '"': 518 return 519 case '\\': 520 escapeChar := char(src, 1) 521 if escapeChar != 'u' { 522 *(*byte)(dst) = unescapeMap[escapeChar] 523 src = unsafeAdd(src, 2) 524 dst = unsafeAdd(dst, 1) 525 } else { 526 code := rune(*ptrUint16(src, 2)) 527 if code >= 0xD800 && code < 0xDC00 { 528 lo := rune(*ptrUint16(src, 8)) 529 code = (code-0xD800)<<10 | (lo - 0xDC00) + 0x10000 530 src = unsafeAdd(src, 6) 531 } 532 var b [utf8.UTFMax]byte 533 n := utf8.EncodeRune(b[:], code) 534 switch n { 535 case 4: 536 *(*byte)(unsafeAdd(dst, 3)) = b[3] 537 fallthrough 538 case 3: 539 *(*byte)(unsafeAdd(dst, 2)) = b[2] 540 fallthrough 541 case 2: 542 *(*byte)(unsafeAdd(dst, 1)) = b[1] 543 fallthrough 544 case 1: 545 *(*byte)(unsafeAdd(dst, 0)) = b[0] 546 } 547 src = unsafeAdd(src, 6) 548 dst = unsafeAdd(dst, n) 549 } 550 case nul: 551 *(*byte)(unsafeAdd(dst, 0)) = 0xEF 552 *(*byte)(unsafeAdd(dst, 1)) = 0xBF 553 *(*byte)(unsafeAdd(dst, 2)) = 0xBD 554 src = unsafeAdd(src, 1) 555 dst = unsafeAdd(dst, 3) 556 default: 557 *(*byte)(dst) = c 558 src = unsafeAdd(src, 1) 559 dst = unsafeAdd(dst, 1) 560 } 561 } 562 } 563 564 func char4(p unsafe.Pointer, offset int64) (byte, byte, byte, byte) { 565 return char(p, offset), char(p, offset+1), char(p, offset+2), char(p, offset+3) 566 } 567 568 func checkHex(v1, v2, v3, v4 byte) int64 { 569 if isHex[v1] == 0 { 570 return 1 571 } 572 if isHex[v2] == 0 { 573 return 2 574 } 575 if isHex[v3] == 0 { 576 return 3 577 } 578 if isHex[v4] == 0 { 579 return 4 580 } 581 return 0 582 } 583 584 func decodeHexRune(v1, v2, v3, v4 byte) rune { 585 return rune(hexToInt[v1]<<12 | hexToInt[v2]<<8 | hexToInt[v3]<<4 | hexToInt[v4]) 586 } 587 588 func runeLen(r rune) int64 { 589 if r <= 127 { 590 return 1 591 } else if r <= 2047 { 592 return 2 593 } else { 594 return 3 595 } 596 } 597 598 type bytesStream struct { 599 buf []byte 600 length int64 601 offset int64 602 } 603 604 func (b *bytesStream) read() bool { 605 return false 606 } 607 608 func (b *bytesStream) requires(cursor, n int64) int { 609 if cursor+n >= b.length { 610 return -1 611 } 612 return 0 613 } 614 615 func (b *bytesStream) syncBufptr(r int, p *unsafe.Pointer) int { 616 return r 617 } 618 619 func (b *bytesStream) bufptr() unsafe.Pointer { 620 panic("unreachable") 621 }