github.com/patricebensoussan/go/codec@v1.2.99/reader.go (about) 1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. 2 // Use of this source code is governed by a MIT license found in the LICENSE file. 3 4 package codec 5 6 import "io" 7 8 // decReader abstracts the reading source, allowing implementations that can 9 // read from an io.Reader or directly off a byte slice with zero-copying. 10 type decReader interface { 11 // readx will use the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR 12 // just return a view of the []byte being decoded from. 13 readx(n uint) []byte 14 readb([]byte) 15 16 readn1() byte 17 readn2() [2]byte 18 // readn3 will read 3 bytes into the top-most elements of a 4-byte array 19 readn3() [4]byte 20 readn4() [4]byte 21 readn8() [8]byte 22 // readn1eof() (v uint8, eof bool) 23 24 // // read up to 8 bytes at a time 25 // readn(num uint8) (v [8]byte) 26 27 numread() uint // number of bytes read 28 29 // skip any whitespace characters, and return the first non-matching byte 30 skipWhitespace() (token byte) 31 32 // jsonReadNum will include last read byte in first element of slice, 33 // and continue numeric characters until it sees a non-numeric char 34 // or EOF. If it sees a non-numeric character, it will unread that. 35 jsonReadNum() []byte 36 37 // jsonReadAsisChars will read json plain characters (anything but " or \) 38 // and return a slice terminated by a non-json asis character. 39 jsonReadAsisChars() []byte 40 41 // skip will skip any byte that matches, and return the first non-matching byte 42 // skip(accept *bitset256) (token byte) 43 44 // readTo will read any byte that matches, stopping once no-longer matching. 45 // readTo(accept *bitset256) (out []byte) 46 47 // readUntil will read, only stopping once it matches the 'stop' byte (which it excludes). 48 readUntil(stop byte) (out []byte) 49 } 50 51 // ------------------------------------------------ 52 53 type unreadByteStatus uint8 54 55 // unreadByteStatus goes from 56 // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ... 57 const ( 58 unreadByteUndefined unreadByteStatus = iota 59 unreadByteCanRead 60 unreadByteCanUnread 61 ) 62 63 // -------------------- 64 65 type ioDecReaderCommon struct { 66 r io.Reader // the reader passed in 67 68 n uint // num read 69 70 l byte // last byte 71 ls unreadByteStatus // last byte status 72 73 b [6]byte // tiny buffer for reading single bytes 74 75 blist *bytesFreelist 76 77 bufr []byte // buffer for readTo/readUntil 78 } 79 80 func (z *ioDecReaderCommon) reset(r io.Reader, blist *bytesFreelist) { 81 z.blist = blist 82 z.r = r 83 z.ls = unreadByteUndefined 84 z.l, z.n = 0, 0 85 z.bufr = z.blist.check(z.bufr, 256) 86 } 87 88 func (z *ioDecReaderCommon) numread() uint { 89 return z.n 90 } 91 92 // ------------------------------------------ 93 94 // ioDecReader is a decReader that reads off an io.Reader. 95 // 96 // It also has a fallback implementation of ByteScanner if needed. 97 type ioDecReader struct { 98 ioDecReaderCommon 99 100 br io.ByteScanner 101 102 x [64 + 48]byte // for: get struct field name, swallow valueTypeBytes, etc 103 } 104 105 func (z *ioDecReader) reset(r io.Reader, blist *bytesFreelist) { 106 z.ioDecReaderCommon.reset(r, blist) 107 108 z.br, _ = r.(io.ByteScanner) 109 } 110 111 func (z *ioDecReader) Read(p []byte) (n int, err error) { 112 if len(p) == 0 { 113 return 114 } 115 var firstByte bool 116 if z.ls == unreadByteCanRead { 117 z.ls = unreadByteCanUnread 118 p[0] = z.l 119 if len(p) == 1 { 120 n = 1 121 return 122 } 123 firstByte = true 124 p = p[1:] 125 } 126 n, err = z.r.Read(p) 127 if n > 0 { 128 if err == io.EOF && n == len(p) { 129 err = nil // read was successful, so postpone EOF (till next time) 130 } 131 z.l = p[n-1] 132 z.ls = unreadByteCanUnread 133 } 134 if firstByte { 135 n++ 136 } 137 return 138 } 139 140 func (z *ioDecReader) ReadByte() (c byte, err error) { 141 if z.br != nil { 142 c, err = z.br.ReadByte() 143 if err == nil { 144 z.l = c 145 z.ls = unreadByteCanUnread 146 } 147 return 148 } 149 150 n, err := z.Read(z.b[:1]) 151 if n == 1 { 152 c = z.b[0] 153 if err == io.EOF { 154 err = nil // read was successful, so postpone EOF (till next time) 155 } 156 } 157 return 158 } 159 160 func (z *ioDecReader) UnreadByte() (err error) { 161 if z.br != nil { 162 err = z.br.UnreadByte() 163 if err == nil { 164 z.ls = unreadByteCanRead 165 } 166 return 167 } 168 169 switch z.ls { 170 case unreadByteCanUnread: 171 z.ls = unreadByteCanRead 172 case unreadByteCanRead: 173 err = errDecUnreadByteLastByteNotRead 174 case unreadByteUndefined: 175 err = errDecUnreadByteNothingToRead 176 default: 177 err = errDecUnreadByteUnknown 178 } 179 return 180 } 181 182 func (z *ioDecReader) readn2() (bs [2]byte) { 183 z.readb(bs[:]) 184 return 185 } 186 187 func (z *ioDecReader) readn3() (bs [4]byte) { 188 z.readb(bs[1:]) 189 return 190 } 191 192 func (z *ioDecReader) readn4() (bs [4]byte) { 193 z.readb(bs[:]) 194 return 195 } 196 197 func (z *ioDecReader) readn8() (bs [8]byte) { 198 z.readb(bs[:]) 199 return 200 } 201 202 func (z *ioDecReader) readx(n uint) (bs []byte) { 203 if n == 0 { 204 return 205 } 206 if n < uint(len(z.x)) { 207 bs = z.x[:n] 208 } else { 209 bs = make([]byte, n) 210 } 211 _, err := readFull(z.r, bs) 212 halt.onerror(err) 213 z.n += uint(len(bs)) 214 return 215 } 216 217 func (z *ioDecReader) readb(bs []byte) { 218 if len(bs) == 0 { 219 return 220 } 221 _, err := readFull(z.r, bs) 222 halt.onerror(err) 223 z.n += uint(len(bs)) 224 } 225 226 func (z *ioDecReader) readn1() (b uint8) { 227 b, err := z.ReadByte() 228 halt.onerror(err) 229 z.n++ 230 return 231 } 232 233 func (z *ioDecReader) readn1eof() (b uint8, eof bool) { 234 b, err := z.ReadByte() 235 if err == nil { 236 z.n++ 237 } else if err == io.EOF { 238 eof = true 239 } else { 240 halt.onerror(err) 241 } 242 return 243 } 244 245 func (z *ioDecReader) jsonReadNum() (bs []byte) { 246 z.unreadn1() 247 z.bufr = z.bufr[:0] 248 LOOP: 249 i, eof := z.readn1eof() 250 if eof { 251 return z.bufr 252 } 253 if isNumberChar(i) { 254 z.bufr = append(z.bufr, i) 255 goto LOOP 256 } 257 z.unreadn1() 258 return z.bufr 259 } 260 261 func (z *ioDecReader) jsonReadAsisChars() (bs []byte) { 262 z.bufr = z.bufr[:0] 263 LOOP: 264 i := z.readn1() 265 z.bufr = append(z.bufr, i) 266 if i == '"' || i == '\\' { 267 return z.bufr 268 } 269 goto LOOP 270 } 271 272 func (z *ioDecReader) skipWhitespace() (token byte) { 273 LOOP: 274 token = z.readn1() 275 if isWhitespaceChar(token) { 276 goto LOOP 277 } 278 return 279 } 280 281 func (z *ioDecReader) readUntil(stop byte) []byte { 282 z.bufr = z.bufr[:0] 283 LOOP: 284 token := z.readn1() 285 z.bufr = append(z.bufr, token) 286 if token == stop { 287 return z.bufr[:len(z.bufr)-1] 288 } 289 goto LOOP 290 } 291 292 func (z *ioDecReader) unreadn1() { 293 err := z.UnreadByte() 294 halt.onerror(err) 295 z.n-- 296 } 297 298 // ------------------------------------ 299 300 type bufioDecReader struct { 301 ioDecReaderCommon 302 303 c uint // cursor 304 buf []byte 305 } 306 307 func (z *bufioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) { 308 z.ioDecReaderCommon.reset(r, blist) 309 z.c = 0 310 if cap(z.buf) < bufsize { 311 z.buf = blist.get(bufsize) 312 } else { 313 z.buf = z.buf[:0] 314 } 315 } 316 317 func (z *bufioDecReader) readb(p []byte) { 318 var n = uint(copy(p, z.buf[z.c:])) 319 z.n += n 320 z.c += n 321 if len(p) != int(n) { 322 z.readbFill(p, n, true, false) 323 } 324 } 325 326 func readbFillHandleErr(err error, must, eof bool) (isEOF bool) { 327 if err == io.EOF { 328 isEOF = true 329 } 330 if must && !(eof && isEOF) { 331 halt.onerror(err) 332 } 333 return 334 } 335 336 func (z *bufioDecReader) readbFill(p0 []byte, n uint, must, eof bool) (isEOF bool, err error) { 337 // at this point, there's nothing in z.buf to read (z.buf is fully consumed) 338 var p []byte 339 if p0 != nil { 340 p = p0[n:] 341 } 342 var n2 uint 343 if len(p) > cap(z.buf) { 344 n2, err = readFull(z.r, p) 345 if err != nil { 346 isEOF = readbFillHandleErr(err, must, eof) 347 return 348 } 349 n += n2 350 z.n += n2 351 // always keep last byte in z.buf 352 z.buf = z.buf[:1] 353 z.buf[0] = p[len(p)-1] 354 z.c = 1 355 return 356 } 357 // z.c is now 0, and len(p) <= cap(z.buf) 358 var n1 int 359 LOOP: 360 // for len(p) > 0 && z.err == nil { 361 z.buf = z.buf[0:cap(z.buf)] 362 n1, err = z.r.Read(z.buf) 363 n2 = uint(n1) 364 if n2 == 0 && err != nil { 365 isEOF = readbFillHandleErr(err, must, eof) 366 return 367 } 368 err = nil 369 z.buf = z.buf[:n2] 370 z.c = 0 371 if len(p) > 0 { 372 n2 = uint(copy(p, z.buf)) 373 z.c = n2 374 n += n2 375 z.n += n2 376 p = p[n2:] 377 if len(p) > 0 { 378 goto LOOP 379 } 380 if z.c == 0 { 381 z.buf = z.buf[:1] 382 z.buf[0] = p[len(p)-1] 383 z.c = 1 384 } 385 } 386 return 387 } 388 389 func (z *bufioDecReader) readn1() (b byte) { 390 if z.c >= uint(len(z.buf)) { 391 z.readbFill(nil, 0, true, false) 392 } 393 b = z.buf[z.c] 394 z.c++ 395 z.n++ 396 return 397 } 398 399 func (z *bufioDecReader) readn1eof() (b byte, eof bool) { 400 if z.c >= uint(len(z.buf)) { 401 eof, _ = z.readbFill(nil, 0, true, true) 402 if eof { 403 return 404 } 405 } 406 b = z.buf[z.c] 407 z.c++ 408 z.n++ 409 return 410 } 411 412 func (z *bufioDecReader) unreadn1() { 413 if z.c == 0 { 414 halt.onerror(errDecUnreadByteNothingToRead) 415 } 416 z.c-- 417 z.n-- 418 } 419 420 func (z *bufioDecReader) readn2() (bs [2]byte) { 421 z.readb(bs[:]) 422 return 423 } 424 425 func (z *bufioDecReader) readn3() (bs [4]byte) { 426 z.readb(bs[1:]) 427 return 428 } 429 430 func (z *bufioDecReader) readn4() (bs [4]byte) { 431 z.readb(bs[:]) 432 return 433 } 434 435 func (z *bufioDecReader) readn8() (bs [8]byte) { 436 z.readb(bs[:]) 437 return 438 } 439 440 func (z *bufioDecReader) readx(n uint) (bs []byte) { 441 if n == 0 { 442 // return 443 } else if z.c+n <= uint(len(z.buf)) { 444 bs = z.buf[z.c : z.c+n] 445 z.n += n 446 z.c += n 447 } else { 448 bs = make([]byte, n) 449 // n no longer used - can reuse 450 n = uint(copy(bs, z.buf[z.c:])) 451 z.n += n 452 z.c += n 453 z.readbFill(bs, n, true, false) 454 } 455 return 456 } 457 458 func (z *bufioDecReader) jsonReadNum() (bs []byte) { 459 z.unreadn1() 460 z.bufr = z.bufr[:0] 461 LOOP: 462 i, eof := z.readn1eof() 463 if eof { 464 return z.bufr 465 } 466 if isNumberChar(i) { 467 z.bufr = append(z.bufr, i) 468 goto LOOP 469 } 470 z.unreadn1() 471 return z.bufr 472 } 473 474 func (z *bufioDecReader) jsonReadAsisChars() (bs []byte) { 475 z.bufr = z.bufr[:0] 476 LOOP: 477 i := z.readn1() 478 z.bufr = append(z.bufr, i) 479 if i == '"' || i == '\\' { 480 return z.bufr 481 } 482 goto LOOP 483 } 484 485 func (z *bufioDecReader) skipWhitespace() (token byte) { 486 i := z.c 487 LOOP: 488 if i < uint(len(z.buf)) { 489 // inline z.skipLoopFn(i) and refactor, so cost is within inline budget 490 token = z.buf[i] 491 i++ 492 if isWhitespaceChar(token) { 493 goto LOOP 494 } 495 z.n += i - 2 - z.c 496 z.c = i 497 return 498 } 499 return z.skipFillWhitespace() 500 } 501 502 func (z *bufioDecReader) skipFillWhitespace() (token byte) { 503 z.n += uint(len(z.buf)) - z.c 504 var i, n2 int 505 var err error 506 for { 507 z.c = 0 508 z.buf = z.buf[0:cap(z.buf)] 509 n2, err = z.r.Read(z.buf) 510 if n2 == 0 { 511 halt.onerror(err) 512 } 513 z.buf = z.buf[:n2] 514 for i, token = range z.buf { 515 if !isWhitespaceChar(token) { 516 z.n += (uint(i) - z.c) - 1 517 z.loopFn(uint(i + 1)) 518 return 519 } 520 } 521 z.n += uint(n2) 522 } 523 } 524 525 func (z *bufioDecReader) loopFn(i uint) { 526 z.c = i 527 } 528 529 func (z *bufioDecReader) readUntil(stop byte) (out []byte) { 530 i := z.c 531 LOOP: 532 if i < uint(len(z.buf)) { 533 if z.buf[i] == stop { 534 z.n += (i - z.c) - 1 535 i++ 536 out = z.buf[z.c:i] 537 z.c = i 538 goto FINISH 539 } 540 i++ 541 goto LOOP 542 } 543 out = z.readUntilFill(stop) 544 FINISH: 545 return out[:len(out)-1] 546 } 547 548 func (z *bufioDecReader) readUntilFill(stop byte) []byte { 549 z.bufr = z.bufr[:0] 550 z.n += uint(len(z.buf)) - z.c 551 z.bufr = append(z.bufr, z.buf[z.c:]...) 552 for { 553 z.c = 0 554 z.buf = z.buf[0:cap(z.buf)] 555 n1, err := z.r.Read(z.buf) 556 if n1 == 0 { 557 halt.onerror(err) 558 } 559 n2 := uint(n1) 560 z.buf = z.buf[:n2] 561 for i, token := range z.buf { 562 if token == stop { 563 z.n += (uint(i) - z.c) - 1 564 z.bufr = append(z.bufr, z.buf[z.c:i+1]...) 565 z.loopFn(uint(i + 1)) 566 return z.bufr 567 } 568 } 569 z.bufr = append(z.bufr, z.buf...) 570 z.n += n2 571 } 572 } 573 574 // ------------------------------------ 575 576 // bytesDecReader is a decReader that reads off a byte slice with zero copying 577 // 578 // Note: we do not try to convert index'ing out of bounds to an io.EOF. 579 // instead, we let it bubble up to the exported Encode/Decode method 580 // and recover it as an io.EOF. 581 // 582 // see panicValToErr(...) function in helper.go. 583 type bytesDecReader struct { 584 b []byte // data 585 c uint // cursor 586 } 587 588 func (z *bytesDecReader) reset(in []byte) { 589 z.b = in[:len(in):len(in)] // reslicing must not go past capacity 590 z.c = 0 591 } 592 593 func (z *bytesDecReader) numread() uint { 594 return z.c 595 } 596 597 // Note: slicing from a non-constant start position is more expensive, 598 // as more computation is required to decipher the pointer start position. 599 // However, we do it only once, and it's better than reslicing both z.b and return value. 600 601 func (z *bytesDecReader) readx(n uint) (bs []byte) { 602 x := z.c + n 603 bs = z.b[z.c:x] 604 z.c = x 605 return 606 } 607 608 func (z *bytesDecReader) readb(bs []byte) { 609 copy(bs, z.readx(uint(len(bs)))) 610 } 611 612 // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown) 613 // func (z *bytesDecReader) readnn(bs []byte, n uint) { 614 // x := z.c 615 // copy(bs, z.b[x:x+n]) 616 // z.c += n 617 // } 618 619 // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) { 620 // x := z.c + uint(num) 621 // copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past 622 // z.c = x 623 // return 624 // } 625 626 // func (z *bytesDecReader) readn1() uint8 { 627 // z.c++ 628 // return z.b[z.c-1] 629 // } 630 631 func (z *bytesDecReader) readn1() (v uint8) { 632 v = z.b[z.c] 633 z.c++ 634 return 635 } 636 637 // MARKER: for readn{2,3,4,8}, ensure you slice z.b completely so we get bounds error if past end. 638 639 func (z *bytesDecReader) readn2() (bs [2]byte) { 640 // copy(bs[:], z.b[z.c:z.c+2]) 641 bs[1] = z.b[z.c+1] 642 bs[0] = z.b[z.c] 643 z.c += 2 644 return 645 } 646 647 func (z *bytesDecReader) readn3() (bs [4]byte) { 648 // copy(bs[1:], z.b[z.c:z.c+3]) 649 bs = okBytes3(z.b[z.c : z.c+3]) 650 z.c += 3 651 return 652 } 653 654 func (z *bytesDecReader) readn4() (bs [4]byte) { 655 // copy(bs[:], z.b[z.c:z.c+4]) 656 bs = okBytes4(z.b[z.c : z.c+4]) 657 z.c += 4 658 return 659 } 660 661 func (z *bytesDecReader) readn8() (bs [8]byte) { 662 // copy(bs[:], z.b[z.c:z.c+8]) 663 bs = okBytes8(z.b[z.c : z.c+8]) 664 z.c += 8 665 return 666 } 667 668 func (z *bytesDecReader) jsonReadNum() []byte { 669 z.c-- 670 i := z.c 671 LOOP: 672 if i < uint(len(z.b)) && isNumberChar(z.b[i]) { 673 i++ 674 goto LOOP 675 } 676 z.c, i = i, z.c 677 return z.b[i:z.c] 678 } 679 680 func (z *bytesDecReader) jsonReadAsisChars() []byte { 681 i := z.c 682 LOOP: 683 token := z.b[i] 684 i++ 685 if token == '"' || token == '\\' { 686 z.c, i = i, z.c 687 return z.b[i:z.c] 688 } 689 goto LOOP 690 } 691 692 func (z *bytesDecReader) skipWhitespace() (token byte) { 693 i := z.c 694 LOOP: 695 token = z.b[i] 696 if isWhitespaceChar(token) { 697 i++ 698 goto LOOP 699 } 700 z.c = i + 1 701 return 702 } 703 704 func (z *bytesDecReader) readUntil(stop byte) (out []byte) { 705 i := z.c 706 LOOP: 707 if z.b[i] == stop { 708 out = z.b[z.c:i] 709 z.c = i + 1 710 return 711 } 712 i++ 713 goto LOOP 714 } 715 716 // -------------- 717 718 type decRd struct { 719 mtr bool // is maptype a known type? 720 str bool // is slicetype a known type? 721 722 be bool // is binary encoding 723 js bool // is json handle 724 jsms bool // is json handle, and MapKeyAsString 725 cbor bool // is cbor handle 726 727 bytes bool // is bytes reader 728 bufio bool // is this a bufioDecReader? 729 730 rb bytesDecReader 731 ri *ioDecReader 732 bi *bufioDecReader 733 734 decReader 735 } 736 737 // From out benchmarking, we see the following in terms of performance: 738 // 739 // - interface calls 740 // - branch that can inline what it calls 741 // 742 // the if/else-if/else block is expensive to inline. 743 // Each node of this construct costs a lot and dominates the budget. 744 // Best to only do an if fast-path else block (so fast-path is inlined). 745 // This is irrespective of inlineExtraCallCost set in $GOROOT/src/cmd/compile/internal/gc/inl.go 746 // 747 // In decRd methods below, we delegate all IO functions into their own methods. 748 // This allows for the inlining of the common path when z.bytes=true. 749 // Go 1.12+ supports inlining methods with up to 1 inlined function (or 2 if no other constructs). 750 // 751 // However, up through Go 1.13, decRd's readXXX, skip and unreadXXX methods are not inlined. 752 // Consequently, there is no benefit to do the xxxIO methods for decRd at this time. 753 // Instead, we have a if/else-if/else block so that IO calls do not have to jump through 754 // a second unnecessary function call. 755 // 756 // If golang inlining gets better and bytesDecReader methods can be inlined, 757 // then we can revert to using these 2 functions so the bytesDecReader 758 // methods are inlined and the IO paths call out to a function. 759 // 760 // decRd is designed to embed a decReader, and then re-implement some of the decReader 761 // methods using a conditional branch. We only override the ones that have a bytes version 762 // that is small enough to be inlined. We use ./run.sh -z to check. 763 // Right now, only numread and readn1 can be inlined. 764 765 func (z *decRd) numread() uint { 766 if z.bytes { 767 return z.rb.numread() 768 } else if z.bufio { 769 return z.bi.numread() 770 } else { 771 return z.ri.numread() 772 } 773 } 774 775 func (z *decRd) readn1() (v uint8) { 776 if z.bytes { 777 // MARKER: manually inline, else this function is not inlined. 778 // Keep in sync with bytesDecReader.readn1 779 // return z.rb.readn1() 780 v = z.rb.b[z.rb.c] 781 z.rb.c++ 782 } else { 783 v = z.readn1IO() 784 } 785 return 786 } 787 func (z *decRd) readn1IO() uint8 { 788 if z.bufio { 789 return z.bi.readn1() 790 } 791 return z.ri.readn1() 792 } 793 794 type devNullReader struct{} 795 796 func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF } 797 func (devNullReader) Close() error { return nil } 798 799 func readFull(r io.Reader, bs []byte) (n uint, err error) { 800 var nn int 801 for n < uint(len(bs)) && err == nil { 802 nn, err = r.Read(bs[n:]) 803 if nn > 0 { 804 if err == io.EOF { 805 // leave EOF for next time 806 err = nil 807 } 808 n += uint(nn) 809 } 810 } 811 // do not do this below - it serves no purpose 812 // if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF } 813 return 814 } 815 816 var _ decReader = (*decRd)(nil)