github.com/lxt1045/json@v0.0.0-20231013032136-54d6b1d6e525/unmarshal.go (about) 1 // MIT License 2 // 3 // Copyright (c) 2021 Xiantu Li 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in all 13 // copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 // SOFTWARE. 22 23 package json 24 25 import ( 26 "errors" 27 "math" 28 "reflect" 29 "strconv" 30 "strings" 31 "sync" 32 "unicode/utf16" 33 "unicode/utf8" 34 "unsafe" 35 36 lxterrs "github.com/lxt1045/errors" 37 ) 38 39 //go:noinline 40 func ErrStream(stream string) string { 41 if len(stream[:]) > 128 { 42 stream = stream[:128] 43 } 44 str := string(stream) 45 return str 46 } 47 48 var spaceTable = [256]bool{ 49 '\t': true, '\n': true, '\v': true, '\f': true, '\r': true, ' ': true, 0x85: true, 0xA0: true, 50 } 51 52 func trimSpace(stream string) (i int) { 53 // if !spaceTable[stream[0]] { 54 // return 55 // } 56 // l := len(stream) 57 // for ; spaceTable[stream[i]] && i < l; i++ { 58 for ; spaceTable[stream[i]]; i++ { 59 } 60 return 61 } 62 63 // 为了 inline 部分共用逻辑让调用者完成; 逻辑 解析:冒号 和 逗号 等单字符 64 // n 表示在空字符串中找到多少个 b 65 func parseByte(stream string, b byte) (i, n int) { 66 for ; ; i++ { 67 // for i = range []byte(stream) { 68 if stream[i] == b { 69 n++ 70 // continue 71 i++ 72 } 73 if !spaceTable[stream[i]] { 74 return 75 } 76 } 77 return 78 } 79 80 func parseObjToSlice(stream string, s []interface{}) (i int) { 81 return 0 82 } 83 84 // 解析 {} 85 // func parseObj(sts status, stream string, store PoolStore, tag *TagInfo) (i int) { 86 func parseObj1(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 87 iSlash = idxSlash 88 i += trimSpace(stream[i:]) 89 if stream[i] == '}' { 90 i++ 91 return 92 } 93 n, nB := 0, 0 94 key := "" 95 for { 96 // 手动内联 97 { 98 // start := i 99 // n = strings.IndexByte(stream[i+1:], '"') // 默认 stream[i] == '"', 不做检查 100 // if n >= 0 { 101 // i += n + 2 102 // key = stream[start:i] 103 // } 104 105 // key 长度比较短,采用 for 循环比 strings.IndexByte 有优势 106 i++ 107 start := i 108 for ; i < len(stream) && stream[i] != '"'; i++ { 109 } 110 key = stream[start:i] 111 i++ 112 } 113 // 解析 冒号 114 n, nB = parseByte(stream[i:], ':') 115 i += n 116 if nB != 1 { 117 panic(lxterrs.New(ErrStream(stream[i:]))) 118 } 119 son := store.tag.Children[string(key)] // map 查询时 []byte -> string 不需要内存分配 120 121 if son != nil { 122 storeSon := PoolStore{ 123 tag: son, 124 pointerPool: store.pointerPool, 125 obj: store.obj, 126 } 127 n, iSlash = son.fUnm(iSlash-i, storeSon, stream[i:]) 128 iSlash += i 129 } else { 130 n = parseEmpty(stream[i:]) 131 } 132 i += n 133 // // 解析 逗号 134 // n, nB = parseByte(stream[i:], ',') 135 // i += n 136 // if nB != 1 { 137 // if nB == 0 && '}' == stream[i] { 138 // i++ 139 // return 140 // } 141 // panic(lxterrs.New(ErrStream(stream[i:]))) 142 // } 143 if stream[i] == ',' && !spaceTable[stream[i+1]] { 144 i++ // 最常命中分支 145 } else if stream[i] == '}' { 146 i++ // 结束时命中分支 147 return 148 } else { 149 // 有空格时命中分支 150 n, nB = parseByte(stream[i:], ',') 151 i += n 152 if nB != 1 { 153 if nB == 0 && '}' == stream[i] { 154 i++ 155 return 156 } 157 panic(lxterrs.New(ErrStream(stream[i:]))) 158 } 159 } 160 } 161 } 162 func parseObj(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 163 iSlash = idxSlash 164 i += trimSpace(stream[i:]) 165 if stream[i] == '}' { 166 i++ 167 return 168 } 169 n, nB := 0, 0 170 // key := "" 171 for { 172 // 手动内联 173 i++ 174 // if store.tag.tireTree == nil { 175 // return 176 // } 177 // if stream[i:i+len("created_at")] == "created_at" { 178 // log.Println("test:", stream[i:i+len(`"created_at": "Mon Apr 26 06:01:55 +0000 2010"`)]) 179 // } 180 son := store.tag.tireTree.Get(stream[i:]) 181 if son != nil { 182 i += len(son.TagName) + 1 183 } else { 184 // start := i 185 // n = strings.IndexByte(stream[i+1:], '"') // 默认 stream[i] == '"', 不做检查 186 // if n >= 0 { 187 // i += n + 2 188 // key = stream[start:i] 189 // } 190 191 // key 长度比较短,采用 for 循环比 strings.IndexByte 有优势 192 // start := i 193 for ; i < len(stream) && stream[i] != '"'; i++ { 194 } 195 // key = stream[start:i] 196 i++ 197 } 198 199 // 解析 冒号 200 n, nB = parseByte(stream[i:], ':') 201 i += n 202 if nB != 1 { 203 panic(lxterrs.New(ErrStream(stream[i:]))) 204 } 205 // son := store.tag.Children[string(key)] // map 查询时 []byte -> string 不需要内存分配 206 207 if son != nil { 208 storeSon := PoolStore{ 209 tag: son, 210 pointerPool: store.pointerPool, 211 obj: store.obj, 212 } 213 n, iSlash = son.fUnm(iSlash-i, storeSon, stream[i:]) 214 iSlash += i 215 } else { 216 n = parseEmpty(stream[i:]) 217 } 218 i += n 219 // // 解析 逗号 220 // n, nB = parseByte(stream[i:], ',') 221 // i += n 222 // if nB != 1 { 223 // if nB == 0 && '}' == stream[i] { 224 // i++ 225 // return 226 // } 227 // panic(lxterrs.New(ErrStream(stream[i:]))) 228 // } 229 if stream[i] == ',' && !spaceTable[stream[i+1]] { 230 i++ // 最常命中分支 231 } else if stream[i] == '}' { 232 i++ // 结束时命中分支 233 return 234 } else { 235 // 有空格时命中分支 236 n, nB = parseByte(stream[i:], ',') 237 i += n 238 if nB != 1 { 239 if nB == 0 && '}' == stream[i] { 240 i++ 241 return 242 } 243 panic(lxterrs.New(ErrStream(stream[i:]))) 244 } 245 } 246 } 247 } 248 func parseMapInterface(idxSlash int, stream string) (m map[string]interface{}, i, iSlash int) { 249 iSlash = idxSlash 250 n, nB := 0, 0 251 key := "" 252 ppairs := pairPool.Get().(*[]pair) 253 pairs := *ppairs 254 for { 255 i += trimSpace(stream[i:]) 256 // 手动内联 257 { 258 // i++ 259 // n = strings.IndexByte(stream[i:], '"') 260 // if n >= 0 { 261 // n += i 262 // key = stream[i:n] 263 // i = n + 1 264 // } 265 266 i++ 267 n = i 268 for ; i < len(stream) && stream[i] != '"'; i++ { 269 } 270 key = stream[n:i] 271 i++ 272 } 273 n, nB = parseByte(stream[i:], ':') 274 if nB != 1 { 275 panic(lxterrs.New(ErrStream(stream[i:]))) 276 } 277 i += n 278 pairs = append(pairs, pair{ 279 k: key, 280 }) 281 n, iSlash = parseInterface(iSlash-i, stream[i:], &pairs[len(pairs)-1].v) 282 iSlash += i 283 i += n 284 // m[string(key)] = *value 285 n, nB = parseByte(stream[i:], ',') 286 i += n 287 if nB != 1 { 288 if nB == 0 && '}' == stream[i] { 289 i++ 290 291 // map 292 // m = make(map[string]interface{}, len(pairs)) 293 m = makeMapEface(len(pairs)) 294 295 for i := range pairs { 296 m[*(*string)(unsafe.Pointer(&pairs[i].k))] = pairs[i].v 297 } 298 *ppairs = pairs[:0] 299 pairPool.Put(ppairs) 300 return 301 } 302 panic(lxterrs.New(ErrStream(stream[i:]))) 303 } 304 } 305 } 306 307 // map[string]T 308 func parseMapValue(idxSlash int, stream string) (m map[string]interface{}, i, iSlash int) { 309 iSlash = idxSlash 310 n, nB := 0, 0 311 key := "" 312 ppairs := pairPool.Get().(*[]pair) 313 pairs := *ppairs 314 for { 315 i += trimSpace(stream[i:]) 316 // 手动内联 317 { 318 // i++ 319 // n = strings.IndexByte(stream[i:], '"') 320 // if n >= 0 { 321 // n += i 322 // key = stream[i:n] 323 // i = n + 1 324 // } 325 326 i++ 327 n = i 328 for ; i < len(stream) && stream[i] != '"'; i++ { 329 } 330 key = stream[n:i] 331 i++ 332 } 333 n, nB = parseByte(stream[i:], ':') 334 if nB != 1 { 335 panic(lxterrs.New(ErrStream(stream[i:]))) 336 } 337 i += n 338 pairs = append(pairs, pair{ 339 k: key, 340 }) 341 n, iSlash = parseInterface(iSlash-i, stream[i:], &pairs[len(pairs)-1].v) 342 iSlash += i 343 i += n 344 // m[string(key)] = *value 345 n, nB = parseByte(stream[i:], ',') 346 i += n 347 if nB != 1 { 348 if nB == 0 && '}' == stream[i] { 349 i++ 350 351 // map 352 // m = make(map[string]interface{}, len(pairs)) 353 m = makeMapEface(len(pairs)) 354 355 for i := range pairs { 356 m[*(*string)(unsafe.Pointer(&pairs[i].k))] = pairs[i].v 357 } 358 *ppairs = pairs[:0] 359 pairPool.Put(ppairs) 360 return 361 } 362 panic(lxterrs.New(ErrStream(stream[i:]))) 363 } 364 } 365 } 366 367 func parseSliceInterface(idxSlash int, stream string) (s []interface{}, i, iSlash int) { 368 iSlash = idxSlash 369 i = trimSpace(stream[i:]) 370 var value interface{} 371 s = poolSliceInterface.Get().([]interface{}) 372 for n, nB := 0, 0; ; { 373 n, iSlash = parseInterface(iSlash-i, stream[i:], &value) 374 iSlash += i 375 i += n 376 s = append(s, value) 377 n, nB = parseByte(stream[i:], ',') 378 i += n 379 if nB != 1 { 380 if nB == 0 && ']' == stream[i] { 381 i++ 382 if cap(s)-len(s) > 4 { 383 sLeft := s[len(s):] 384 poolSliceInterface.Put(sLeft) 385 s = s[:len(s):len(s)] 386 } 387 return 388 } 389 panic(lxterrs.New(ErrStream(stream[i:]))) 390 } 391 } 392 } 393 394 //parseSlice 可以细化一下,每个类型来一个,速度可以加快 395 func parseSlice2(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 396 iSlash = idxSlash 397 i = trimSpace(stream) 398 if stream[i] == ']' { 399 i++ 400 pHeader := (*SliceHeader)(store.obj) 401 pHeader.Data = store.obj 402 return 403 } 404 son := store.tag.ChildList[0] 405 size := son.TypeSize 406 tag := store.tag 407 uint8s := tag.SPool.Get().(*[]uint8) // cpu %12; parseSlice, cpu 20% 408 pHeader := (*SliceHeader)(store.obj) 409 bases := (*[]uint8)(store.obj) 410 SPoolN, BaseType := store.tag.SPoolN, store.tag.BaseType 411 store.tag = son 412 for n, nB := 0, 0; ; { 413 if len(*uint8s)+size > cap(*uint8s) { 414 l := cap(*uint8s) / size 415 c := l * 2 416 if c < int(SPoolN) { 417 c = int(SPoolN) 418 } 419 v := reflect.MakeSlice(BaseType, 0, c) 420 p := reflectValueToPointer(&v) 421 news := (*[]uint8)(p) 422 423 pH := (*SliceHeader)(p) 424 pH.Cap = pH.Cap * size 425 // copy(*news, *uint8s) 426 // *uint8s = *news 427 *uint8s = append((*news)[:0], *uint8s...) 428 } 429 430 l := len(*uint8s) 431 *uint8s = (*uint8s)[:l+size] 432 433 p := unsafe.Pointer(&(*uint8s)[l]) 434 store.obj = p 435 n, iSlash = son.fUnm(iSlash-i, store, stream[i:]) 436 iSlash += i 437 // if n == 0 { 438 // pHeader.Len -= size 439 // } 440 i += n 441 n, nB = parseByte(stream[i:], ',') 442 i += n 443 if nB != 1 { 444 if nB == 0 && ']' == stream[i] { 445 i++ 446 break 447 } 448 panic(lxterrs.New(ErrStream(stream[i:]))) 449 } 450 } 451 452 *bases = (*uint8s)[:len(*uint8s):len(*uint8s)] 453 if cap(*uint8s)-len(*uint8s) > 16*size { 454 *uint8s = (*uint8s)[len(*uint8s):] 455 tag.SPool.Put(uint8s) 456 } 457 // pH.Data = uintptr(pointerOffset(unsafe.Pointer(pHeader.Data), uintptr(pHeader.Len))) 458 // pH.Cap = pHeader.Cap - pHeader.Len 459 pHeader.Len = pHeader.Len / size 460 pHeader.Cap = pHeader.Cap / size 461 462 return 463 } 464 465 //parseSlice 可以细化一下,每个类型来一个,速度可以加快 466 func parseSlice3(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 467 iSlash = idxSlash 468 i = trimSpace(stream) 469 if stream[i] == ']' { 470 i++ 471 pHeader := (*SliceHeader)(store.obj) 472 pHeader.Data = store.obj 473 return 474 } 475 son := store.tag.ChildList[0] 476 size := son.TypeSize 477 tag := store.tag 478 uint8s := (*[1 << 20]uint8)(tag.SPool2.GetN(4))[: 0 : 4*size] 479 pHeader := (*SliceHeader)(store.obj) 480 bases := (*[]uint8)(store.obj) 481 store.tag = son 482 for n, nB := 0, 0; ; { 483 if len(uint8s)+size > cap(uint8s) { 484 l := cap(uint8s) / size 485 c := l * 2 486 news := (*[1 << 20]uint8)(tag.SPool2.GetN(c))[: 0 : c*size] 487 uint8s = append(news, uint8s...) 488 } 489 490 l := len(uint8s) 491 uint8s = uint8s[:l+size] 492 493 p := unsafe.Pointer(&uint8s[l]) 494 store.obj = p 495 n, iSlash = son.fUnm(iSlash-i, store, stream[i:]) 496 iSlash += i 497 // if n == 0 { 498 // pHeader.Len -= size 499 // } 500 i += n 501 n, nB = parseByte(stream[i:], ',') 502 i += n 503 if nB != 1 { 504 if nB == 0 && ']' == stream[i] { 505 i++ 506 break 507 } 508 panic(lxterrs.New(ErrStream(stream[i:]))) 509 } 510 } 511 512 *bases = uint8s 513 // pH.Data = uintptr(pointerOffset(unsafe.Pointer(pHeader.Data), uintptr(pHeader.Len))) 514 // pH.Cap = pHeader.Cap - pHeader.Len 515 pHeader.Len = pHeader.Len / size 516 pHeader.Cap = pHeader.Cap / size 517 518 return 519 } 520 521 //parseSlice 可以细化一下,每个类型来一个,速度可以加快 522 func parseSlice(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 523 iSlash = idxSlash 524 i = trimSpace(stream) 525 if stream[i] == ']' { 526 i++ 527 pHeader := (*SliceHeader)(store.obj) 528 pHeader.Data = store.obj 529 return 530 } 531 son := store.tag.ChildList[0] 532 size := son.TypeSize 533 534 // TODO : 从 store.pool 获取 pool 535 // uint8s := store.tag.SPool.Get().(*[]uint8) // cpu %12; , cpu 20% 536 537 // uint8s := store.GetObjs(store.tag.idxSliceObjPool, store.tag.BaseType) 538 // uint8s := store.GetObjs(store.tag.sliceElemGoType) 539 p := son.sliceCache.GetN(4) 540 541 pLen, pCap := 0, 4 542 pHeader := (*SliceHeader)(store.obj) 543 store.tag = son 544 for n, nB := 0, 0; ; { 545 store.obj = pointerOffset(p, uintptr(pLen*size)) 546 n, iSlash = son.fUnm(iSlash-i, store, stream[i:]) 547 pLen++ 548 iSlash += i 549 i += n 550 n, nB = parseByte(stream[i:], ',') 551 i += n 552 if nB != 1 { 553 if nB == 0 && ']' == stream[i] { 554 i++ 555 break 556 } 557 panic(lxterrs.New(ErrStream(stream[i:]))) 558 } 559 if pLen == pCap { 560 l := pLen * size 561 pCap = pCap * 2 562 c := pCap * size 563 pNew := son.sliceCache.GetN(pCap) 564 dst := (*[1 << 30]byte)(pNew)[:l:c] 565 src := (*[1 << 30]byte)(p)[:l:l] 566 copy(dst, src) 567 // dst = append(dst, src...) 568 p = pNew 569 } 570 } 571 572 pHeader.Data = p 573 pHeader.Len = pLen 574 pHeader.Cap = pCap 575 return 576 } 577 578 //parseNoscanSlice 解析没有 pointer 的 slice,分配内存是不需要标注指针 579 func parseNoscanSlice(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 580 iSlash = idxSlash 581 i = trimSpace(stream) 582 if stream[i] == ']' { 583 i++ 584 pHeader := (*SliceHeader)(store.obj) 585 pHeader.Data = store.obj 586 return 587 } 588 son := store.tag.ChildList[0] 589 size := son.TypeSize 590 // bytes := store.GetNoscan() 591 p := unsafe.Pointer(gNoscanPool.GetN(4 * size)) 592 pLen := 0 593 pCap := 4 594 595 for n, nB := 0, 0; ; { 596 n, iSlash = son.fUnm(iSlash-i, PoolStore{ 597 obj: pointerOffset(p, uintptr(pLen*size)), 598 tag: son, 599 pointerPool: store.pointerPool, 600 }, stream[i:]) 601 pLen++ 602 iSlash += i 603 i += n 604 605 if stream[i] == ',' && !spaceTable[stream[i+1]] { 606 i++ // 最常命中分支 607 } else if stream[i] == ']' { 608 i++ // 结束时命中分支 609 break 610 } else { 611 // 有空格时命中分支 612 n, nB = parseByte(stream[i:], ',') 613 i += n 614 if nB != 1 { 615 if nB == 0 && ']' == stream[i] { 616 i++ 617 break 618 } 619 panic(lxterrs.New(ErrStream(stream[i:]))) 620 } 621 } 622 if pLen == pCap { 623 l := pLen * size 624 pCap = pCap * 2 625 c := pCap * size 626 bytes := gNoscanPool.GetN2(c)[:0:c] 627 bs := (*[1 << 30]byte)(p)[:l] 628 bytes = append(bytes, bs...) 629 p = unsafe.Pointer(&bytes[0]) 630 } 631 } 632 sh := (*SliceHeader)(store.obj) 633 sh.Data = p 634 sh.Len = pLen 635 sh.Cap = pLen 636 return 637 } 638 639 //parseNoscanSlice 解析没有 pointer 的 slice,分配内存是不需要标注指针 640 func parseIntSlice(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 641 iSlash = idxSlash 642 i = trimSpace(stream) 643 if stream[i] == ']' { 644 i++ 645 pHeader := (*SliceHeader)(store.obj) 646 pHeader.Data = store.obj 647 return 648 } 649 // ints := store.GetInts() 650 size := store.tag.ChildList[0].TypeSize 651 p := unsafe.Pointer(gNoscanPool.GetN(4 * size)) 652 pLen := 0 653 pCap := 4 654 for n, nB := 0, 0; ; { 655 { 656 num, n := ParseInt(stream[i:]) 657 i += n 658 pInt := pointerOffset(p, uintptr(pLen*size)) 659 *(*int)(pInt) = int(num) 660 pLen++ 661 } 662 n, nB = parseByte(stream[i:], ',') 663 i += n 664 if nB != 1 { 665 if nB == 0 && ']' == stream[i] { 666 i++ 667 break 668 } 669 panic(lxterrs.New(ErrStream(stream[i:]))) 670 } 671 if pLen == pCap { 672 l := pLen * size 673 pCap = pLen * 2 674 c := pCap * size 675 bytes := gNoscanPool.GetN2(c)[:0:c] 676 bs := (*[1 << 30]byte)(p)[:l] 677 bytes = append(bytes, bs...) 678 p = unsafe.Pointer(&bytes[0]) 679 } 680 } 681 sh := (*SliceHeader)(store.obj) 682 sh.Data = p 683 sh.Len = pLen 684 sh.Cap = pLen 685 // *(*[]int)(store.obj) = []int{6, 7, 8} 686 return 687 } 688 689 //parseNoscanSlice 解析没有 pointer 的 slice,分配内存是不需要标注指针 690 func parseIntSlice1(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 691 iSlash = idxSlash 692 i = trimSpace(stream) 693 if stream[i] == ']' { 694 i++ 695 pHeader := (*SliceHeader)(store.obj) 696 pHeader.Data = store.obj 697 return 698 } 699 // ints := store.GetInts() 700 ints := gIntPool.GetN2(4) 701 for n, nB := 0, 0; ; { 702 { 703 num, n := ParseInt(stream[i:]) 704 i += n 705 ints = append(ints, int(num)) 706 } 707 n, nB = parseByte(stream[i:], ',') 708 i += n 709 if nB != 1 { 710 if nB == 0 && ']' == stream[i] { 711 i++ 712 break 713 } 714 panic(lxterrs.New(ErrStream(stream[i:]))) 715 } 716 if len(ints) == cap(ints) { 717 c := cap(ints) * 2 718 bytes := gIntPool.GetN2(c)[:0:c] 719 bytes = append(bytes, ints...) 720 } 721 } 722 *(*[]int)(store.obj) = ints 723 return 724 } 725 726 //quadwords 4word: 64bit; d:doubleword,双字,32位; w:word,双字节,字,16位; b:byte,字节,8位 727 // tag 实际上已经可以提前知道了,这里无需再取一次,重复了 728 func parseSliceString1(idxSlash int, stream string, store PoolStore, SPoolN int, strsPool *sync.Pool) (i, iSlash int) { 729 iSlash = idxSlash 730 i = trimSpace(stream[i:]) 731 if stream[i] == ']' { 732 i++ 733 pHeader := (*SliceHeader)(store.obj) 734 pHeader.Data = store.obj 735 return 736 } 737 // TODO 使用 IndexByte 先计算slice 的长度,在分配内存 738 // pstrs := strsPool.Get().(*[]string) 739 // strs = strs[:0:cap(strs)] 740 strs := (*[1 << 20]string)(unsafe.Pointer(strPool.GetN(4)))[:0:4] //make([]string, 0, 4) 741 pstrs := (*[]string)(store.obj) 742 *pstrs = strs 743 for n, nB := 0, 0; ; { 744 if len(*pstrs)+1 > cap(*pstrs) { 745 c := len(*pstrs) * 2 746 news := (*[1 << 20]string)(unsafe.Pointer(strPool.GetN(c)))[:0:c] 747 *pstrs = append(news, *pstrs...) 748 } 749 *pstrs = (*pstrs)[:len(*pstrs)+1] 750 // son := store.tag.ChildList[0] 751 // n, iSlash = son.fUnm(iSlash-i, PoolStore{ 752 // obj: unsafe.Pointer(&(*pstrs)[len(*pstrs)-1]), 753 // tag: son, 754 // pool: store.pool, 755 // }, stream[i:]) 756 // iSlash += i 757 // i += n 758 { 759 // 全部内联 760 i++ 761 n := strings.IndexByte(stream[i:], '"') 762 n += i 763 if iSlash > n { 764 (*pstrs)[len(*pstrs)-1] = stream[i:n] 765 i = n + 1 766 } else { 767 (*pstrs)[len(*pstrs)-1], n, iSlash = parseUnescapeStr(stream[i:], n-i, iSlash) 768 iSlash += i 769 i = i + n 770 } 771 } 772 n, nB = parseByte(stream[i:], ',') 773 i += n 774 if nB != 1 { 775 if nB == 0 && ']' == stream[i] { 776 i++ 777 break 778 } 779 panic(lxterrs.New(ErrStream(stream[i:]))) 780 } 781 } 782 return 783 } 784 785 func parseSliceString(idxSlash int, stream string, store PoolStore) (i, iSlash int) { 786 iSlash = idxSlash 787 i = trimSpace(stream[i:]) 788 if stream[i] == ']' { 789 i++ 790 pHeader := (*SliceHeader)(store.obj) 791 pHeader.Data = store.obj 792 return 793 } 794 // strs := store.GetStrings() 795 strs := gStringPool.GetN2(4)[:1] 796 for n, nB := 0, 0; ; { 797 // strs = GrowStrings(strs, 1) 798 { 799 // 全部内联 800 i++ 801 n := strings.IndexByte(stream[i:], '"') 802 n += i 803 if iSlash > n { 804 strs[len(strs)-1] = stream[i:n] 805 // strs = append(strs, stream[i:n]) 806 i = n + 1 807 } else { 808 strs[len(strs)-1], n, iSlash = parseUnescapeStr(stream[i:], n-i, iSlash) 809 iSlash += i 810 i = i + n 811 } 812 } 813 n, nB = parseByte(stream[i:], ',') 814 i += n 815 if nB != 1 { 816 if nB == 0 && ']' == stream[i] { 817 i++ 818 break 819 } 820 panic(lxterrs.New(ErrStream(stream[i:]))) 821 } 822 if l := len(strs); l == cap(strs) { 823 strsNew := gStringPool.GetN2(l * 2) 824 strs = append(strsNew[:0], strs...) 825 } 826 strs = strs[:len(strs)+1] 827 } 828 *(*[]string)(store.obj) = strs[:] 829 return 830 } 831 832 // key 后面的单元: Num, str, bool, slice, obj, null 833 func parseInterface(idxSlash int, stream string, p *interface{}) (i, iSlash int) { 834 iSlash = idxSlash 835 // i = trimSpace(stream) 836 switch stream[0] { 837 default: // num 838 var f float64 839 f, i = float64UnmFuncs(stream) 840 *p = f 841 case '{': // obj 842 var m map[string]interface{} // TODO: m 逃逸了 843 m, i, iSlash = parseMapInterface(iSlash-1, stream[1:]) 844 iSlash++ 845 i++ 846 *p = m 847 case '}': 848 case '[': // slice 849 var s []interface{} 850 s, i, iSlash = parseSliceInterface(iSlash-1, stream[1:]) 851 iSlash++ 852 i++ 853 // *p = s 854 ps := islicePool.Get() 855 *ps = s 856 pEface := (*GoEface)(unsafe.Pointer(p)) 857 pEface.Type = isliceEface.Type 858 pEface.Value = unsafe.Pointer(ps) 859 case ']': 860 case 'n': 861 if stream[i+1] != 'u' || stream[i+2] != 'l' || stream[i+3] != 'l' { 862 err := lxterrs.New("should be \"null\", not [%s]", ErrStream(stream)) 863 panic(err) 864 } 865 i = 4 866 case 't': 867 if stream[i+1] != 'r' || stream[i+2] != 'u' || stream[i+3] != 'e' { 868 err := lxterrs.New("should be \"true\", not [%s]", ErrStream(stream)) 869 panic(err) 870 } 871 i = 4 872 *p = true 873 case 'f': 874 if stream[i+1] != 'a' || stream[i+2] != 'l' || stream[i+3] != 's' || stream[i+4] != 'e' { 875 err := lxterrs.New("should be \"false\", not [%s]", ErrStream(stream)) 876 panic(err) 877 } 878 i = 5 879 *p = false 880 case '"': 881 var raw string 882 // 883 raw, i, iSlash = parseStr(stream, iSlash) 884 // *p = bytesString(raw) 885 // return 886 887 // pstr := strPool.Get() // 888 pstr := BatchGet(strPool) // 889 // bytesCopyToString(raw, pstr) 890 *pstr = *(*string)(unsafe.Pointer(&raw)) 891 pEface := (*GoEface)(unsafe.Pointer(p)) 892 pEface.Type = strEface.Type 893 pEface.Value = unsafe.Pointer(pstr) 894 } 895 return 896 } 897 898 func parseEmptyObjSlice(stream string, bLeft, bRight byte) (i int) { 899 indexQuote := func(stream string, i int) int { 900 for { 901 iDQuote := strings.IndexByte(stream[i:], '"') 902 if iDQuote < 0 { 903 return math.MaxInt32 904 } 905 i += iDQuote // 指向 '"' 906 if stream[i-1] != '\\' { 907 return i 908 } 909 j := i - 2 910 for ; stream[j] == '\\'; j-- { 911 } 912 if (i-j)%2 == 0 { 913 i++ 914 continue 915 } 916 return i 917 } 918 } 919 i++ 920 nBrace := 0 // " 和 { 921 iBraceL := strings.IndexByte(stream[i:], bLeft) //通过 ’“‘ 的 idx 来确定'{' '}' 是否在字符串中 922 iBraceR := strings.IndexByte(stream[i:], bRight) 923 if iBraceL < 0 { 924 iBraceL = math.MaxInt32 // 保证 +i 后不会溢出 925 } 926 if iBraceR < 0 { 927 iBraceR = math.MaxInt32 928 } 929 iBraceL, iBraceR = iBraceL+i, iBraceR+i 930 931 iDQuoteL := indexQuote(stream, i) 932 iDQuoteR := indexQuote(stream, iDQuoteL+1) 933 934 for { 935 // 1. 以 iBraceR 为边界 936 if iBraceR < iBraceL { 937 if iDQuoteR < iBraceR { 938 // ']'在右区间 939 iDQuoteL = indexQuote(stream, iDQuoteR+1) 940 iDQuoteR = indexQuote(stream, iDQuoteL+1) 941 continue 942 } else if iBraceR < iDQuoteL { 943 // ']'在左区间 944 if nBrace == 0 { 945 i = iBraceR + 1 946 return 947 } 948 nBrace-- 949 iBraceR++ 950 iBraceRNew := strings.IndexByte(stream[iBraceR:], bRight) 951 if iBraceRNew < 0 { 952 iBraceRNew = math.MaxInt32 953 } 954 iBraceR += iBraceRNew 955 continue 956 } else { 957 // ']'在中间区间 958 iBraceR = strings.IndexByte(stream[iDQuoteR:], bRight) 959 if iBraceR < 0 { 960 iBraceR = math.MaxInt32 961 } 962 iBraceR += iDQuoteR 963 continue 964 } 965 } else { 966 // iBraceL < iBraceR 967 // 2. 以 iBraceR 为边界 968 969 if iDQuoteR < iBraceL { 970 // ']'在右区间 971 iDQuoteL = indexQuote(stream, iDQuoteR+1) 972 iDQuoteR = indexQuote(stream, iDQuoteL+1) 973 continue 974 } else if iBraceL < iDQuoteL { 975 // ']'在左区间 976 nBrace++ 977 iBraceL++ 978 iBraceLNew := strings.IndexByte(stream[iBraceL:], bLeft) //通过 ’“‘ 的 idx 来确定'{' '}' 是否在字符串中 979 if iBraceLNew < 0 { 980 iBraceLNew = math.MaxInt32 // 保证 +i 后不会溢出 981 } 982 iBraceL += iBraceLNew 983 continue 984 } else { 985 // ']'在中间区间 986 iBraceL = strings.IndexByte(stream[iDQuoteR:], bLeft) 987 if iBraceL < 0 { 988 iBraceL = math.MaxInt32 989 } 990 iBraceL += iDQuoteR 991 continue 992 } 993 } 994 } 995 return 996 } 997 998 //TODO 通过 IndexByte 的方式快速跳过; 在下一层处理,这里 设为 nil 999 // 如果是 其他: 找 ',' 1000 // 如果是obj: 1. 找 ’}‘; 2. 找'{'; 3. 如果 2 比 1 小则循环 1 2 1001 // 如果是 slice : 1. 找 ’]‘; 2. 找'['; 3. 如果 2 比 1 小则循环 1 2 1002 // var iface interface{} 1003 // n, iSlash = parseInterface(iSlash-i, stream[i:], &iface) 1004 // iSlash += i 1005 func parseEmpty(stream string) (i int) { 1006 switch stream[0] { 1007 default: // num 1008 for ; i < len(stream); i++ { 1009 c := stream[i] 1010 if c == ']' || c == '}' || c == ',' { 1011 break 1012 } 1013 } 1014 case '{': // obj 1015 n := parseEmptyObjSlice(stream[i:], '{', '}') 1016 i += n 1017 case '[': // slice 1018 n := parseEmptyObjSlice(stream[i:], '[', ']') 1019 i += n 1020 case ']', '}': 1021 case 'n': 1022 if stream[i+1] != 'u' || stream[i+2] != 'l' || stream[i+3] != 'l' { 1023 err := lxterrs.New("should be \"null\", not [%s]", ErrStream(stream)) 1024 panic(err) 1025 } 1026 i = 4 1027 case 't': 1028 if stream[i+1] != 'r' || stream[i+2] != 'u' || stream[i+3] != 'e' { 1029 err := lxterrs.New("should be \"true\", not [%s]", ErrStream(stream)) 1030 panic(err) 1031 } 1032 i = 4 1033 case 'f': 1034 if stream[i+1] != 'a' || stream[i+2] != 'l' || stream[i+3] != 's' || stream[i+4] != 'e' { 1035 err := lxterrs.New("should be \"false\", not [%s]", ErrStream(stream)) 1036 panic(err) 1037 } 1038 i = 5 1039 case '"': 1040 i++ 1041 for { 1042 iDQuote := strings.IndexByte(stream[i:], '"') 1043 i += iDQuote // 指向 '"' 1044 1045 // 处理转义字符串 1046 if stream[i-1] == '\\' { 1047 j := i - 2 1048 for ; stream[j] == '\\'; j-- { 1049 } 1050 if (i-j)%2 == 0 { 1051 i++ 1052 continue 1053 } 1054 } 1055 i++ 1056 return 1057 } 1058 } 1059 return 1060 } 1061 1062 //解析 obj: {}, 或 [] 1063 func parseRoot(stream string, store PoolStore) (err error) { 1064 idxSlash := strings.IndexByte(stream[1:], '\\') 1065 if idxSlash < 0 { 1066 idxSlash = math.MaxInt 1067 } 1068 if stream[0] == '{' { 1069 parseObj(idxSlash, stream[1:], store) 1070 return 1071 } 1072 return 1073 } 1074 1075 func parseStr(stream string, nextSlashIdx int) (raw string, i, nextSlashIdxOut int) { 1076 i = strings.IndexByte(stream[1:], '"') 1077 if i >= 0 && nextSlashIdx > i+1 { 1078 i++ 1079 raw = stream[1:i] 1080 i++ 1081 nextSlashIdxOut = nextSlashIdx 1082 return 1083 } 1084 i++ 1085 return parseUnescapeStr(stream, i, nextSlashIdx) 1086 } 1087 1088 func parseUnescapeStr(stream string, nextQuotesIdx, nextSlashIdxIn int) (raw string, i, nextSlashIdx int) { 1089 nextSlashIdx = nextSlashIdxIn 1090 if nextSlashIdx < 0 { 1091 nextSlashIdx = strings.IndexByte(stream[1:], '\\') 1092 if nextSlashIdx < 0 { 1093 nextSlashIdx = math.MaxInt 1094 i += nextQuotesIdx 1095 raw = stream[1:i] 1096 i++ 1097 return 1098 } 1099 1100 nextSlashIdx++ 1101 // 处理 '\"' 1102 for { 1103 i += nextQuotesIdx // 指向 '"' 1104 if stream[i-1] == '\\' { 1105 j := i - 2 1106 for ; stream[j] == '\\'; j-- { 1107 } 1108 if (i-j)%2 == 0 { 1109 i++ 1110 nextQuotesIdx = strings.IndexByte(stream[i:], '"') 1111 continue 1112 } 1113 } 1114 i++ 1115 break 1116 } 1117 } 1118 if nextQuotesIdx < 0 { 1119 panic(string(stream[i:])) 1120 } 1121 if nextSlashIdx > nextQuotesIdx { 1122 i = nextQuotesIdx + 1 1123 raw = stream[:i] 1124 return 1125 } 1126 lastIdx := 0 1127 var bs []byte 1128 for { 1129 i = nextSlashIdx 1130 word, wordSize := unescapeStr(stream[i:]) 1131 if len(bs) == 0 { 1132 bs = make([]byte, 0, nextQuotesIdx) 1133 bs = append(bs[:0], stream[1:i]...) //新建 string 避免修改员 stream 1134 } else if lastIdx < i { 1135 bs = append(bs, stream[lastIdx:i]...) 1136 } 1137 bs = append(bs, word...) 1138 i += wordSize 1139 lastIdx = i 1140 if word[0] == '"' { 1141 nextQuotesIdx = strings.IndexByte(stream[i:], '"') 1142 if nextQuotesIdx < 0 { 1143 panic(string(stream[i:])) 1144 } 1145 nextQuotesIdx += i 1146 } 1147 1148 nextSlashIdx = strings.IndexByte(stream[i:], '\\') 1149 if nextSlashIdx < 0 { 1150 nextSlashIdx = math.MaxInt 1151 break 1152 } 1153 nextSlashIdx += i 1154 if nextSlashIdx > nextQuotesIdx { 1155 break 1156 } 1157 } 1158 bs = append(bs, stream[lastIdx:nextQuotesIdx]...) 1159 return bytesString(bs), nextQuotesIdx + 1, nextSlashIdx 1160 } 1161 1162 // unescape unescapes a string 1163 //“\\”、“\"”、“\/”、“\b”、“\f”、“\n”、“\r”、“\t” 1164 // \u后面跟随4位16进制数字: "\uD83D\uDE02" 1165 func unescapeStr(raw string) (word []byte, size int) { 1166 // i==0是 '\\', 所以从1开始 1167 switch raw[1] { 1168 case '\\': 1169 word, size = []byte{'\\'}, 2 1170 case '/': 1171 word, size = []byte{'/'}, 2 1172 case 'b': 1173 word, size = []byte{'\b'}, 2 1174 case 'f': 1175 word, size = []byte{'\f'}, 2 1176 case 'n': 1177 word, size = []byte{'\n'}, 2 1178 case 'r': 1179 word, size = []byte{'\r'}, 2 1180 case 't': 1181 word, size = []byte{'\t'}, 2 1182 case '"': 1183 word, size = []byte{'"'}, 2 1184 case 'u': 1185 //\uD83D 1186 if len(raw) < 6 { 1187 panic(errors.New("incorrect format: \\" + string(raw))) 1188 } 1189 last := raw[:6] 1190 r0 := unescapeToRune(last[2:]) 1191 size, raw = 6, raw[6:] 1192 if utf16.IsSurrogate(r0) { // 如果utf-6还有后续(不完整) 1193 if len(raw) < 6 || raw[0] != '\\' || raw[1] != 'u' { 1194 l := 6 1195 if l > len(raw) { 1196 l = len(raw) 1197 } 1198 panic(errors.New("incorrect format: \\" + string(last) + string(raw[:l]))) 1199 } 1200 r1 := unescapeToRune(raw[:6]) 1201 // we expect it to be correct so just consume it 1202 r0 = utf16.DecodeRune(r0, r1) 1203 size += 6 1204 } 1205 // provide enough space to encode the largest utf8 possible 1206 word = make([]byte, 4) 1207 n := utf8.EncodeRune(word, r0) 1208 word = word[:n] 1209 default: 1210 panic(errors.New("incorrect format: " + ErrStream(raw))) 1211 } 1212 return 1213 } 1214 1215 // runeit returns the rune from the the \uXXXX 1216 func unescapeToRune(raw string) rune { 1217 n, err := strconv.ParseUint(string(raw), 16, 64) 1218 if err != nil { 1219 panic(errors.New("err:" + err.Error() + ",ncorrect format: " + string(raw))) 1220 } 1221 return rune(n) 1222 }