github.com/ugorji/go/codec@v1.2.13-0.20240307214044-07c54c229a5a/reader.go (about)

     1  // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
     2  // Use of this source code is governed by a MIT license found in the LICENSE file.
     3  
     4  package codec
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"io"
    10  	"strings"
    11  )
    12  
    13  // decReader abstracts the reading source, allowing implementations that can
    14  // read from an io.Reader or directly off a byte slice with zero-copying.
    15  type decReader interface {
    16  	// readx will return a view of the []byte if decoding from a []byte, OR
    17  	// read into the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR
    18  	// create a new []byte and read into that
    19  	readx(n uint) []byte
    20  
    21  	readb([]byte)
    22  
    23  	readn1() byte
    24  	readn2() [2]byte
    25  	readn3() [3]byte
    26  	readn4() [4]byte
    27  	readn8() [8]byte
    28  	// readn1eof() (v uint8, eof bool)
    29  
    30  	// // read up to 8 bytes at a time
    31  	// readn(num uint8) (v [8]byte)
    32  
    33  	numread() uint // number of bytes read
    34  
    35  	// skip any whitespace characters, and return the first non-matching byte
    36  	skipWhitespace() (token byte)
    37  
    38  	// jsonReadNum will include last read byte in first element of slice,
    39  	// and continue numeric characters until it sees a non-numeric char
    40  	// or EOF. If it sees a non-numeric character, it will unread that.
    41  	jsonReadNum() []byte
    42  
    43  	// jsonReadAsisChars will read json plain characters (anything but " or \)
    44  	// and return a slice terminated by a non-json asis character.
    45  	jsonReadAsisChars() []byte
    46  
    47  	// skip will skip any byte that matches, and return the first non-matching byte
    48  	// skip(accept *bitset256) (token byte)
    49  
    50  	// readTo will read any byte that matches, stopping once no-longer matching.
    51  	// readTo(accept *bitset256) (out []byte)
    52  
    53  	// readUntil will read, only stopping once it matches the 'stop' byte (which it excludes).
    54  	readUntil(stop byte) (out []byte)
    55  }
    56  
    57  // ------------------------------------------------
    58  
    59  type unreadByteStatus uint8
    60  
    61  // unreadByteStatus goes from
    62  // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ...
    63  const (
    64  	unreadByteUndefined unreadByteStatus = iota
    65  	unreadByteCanRead
    66  	unreadByteCanUnread
    67  )
    68  
    69  // const defBufReaderSize = 4096
    70  
    71  // --------------------
    72  
    73  // ioReaderByteScanner contains the io.Reader and io.ByteScanner interfaces
    74  type ioReaderByteScanner interface {
    75  	io.Reader
    76  	io.ByteScanner
    77  	// ReadByte() (byte, error)
    78  	// UnreadByte() error
    79  	// Read(p []byte) (n int, err error)
    80  }
    81  
    82  // ioReaderByteScannerT does a simple wrapper of a io.ByteScanner
    83  // over a io.Reader
    84  type ioReaderByteScannerT struct {
    85  	r io.Reader
    86  
    87  	l  byte             // last byte
    88  	ls unreadByteStatus // last byte status
    89  
    90  	_ [2]byte // padding
    91  	b [4]byte // tiny buffer for reading single bytes
    92  }
    93  
    94  func (z *ioReaderByteScannerT) ReadByte() (c byte, err error) {
    95  	if z.ls == unreadByteCanRead {
    96  		z.ls = unreadByteCanUnread
    97  		c = z.l
    98  	} else {
    99  		_, err = z.Read(z.b[:1])
   100  		c = z.b[0]
   101  	}
   102  	return
   103  }
   104  
   105  func (z *ioReaderByteScannerT) UnreadByte() (err error) {
   106  	switch z.ls {
   107  	case unreadByteCanUnread:
   108  		z.ls = unreadByteCanRead
   109  	case unreadByteCanRead:
   110  		err = errDecUnreadByteLastByteNotRead
   111  	case unreadByteUndefined:
   112  		err = errDecUnreadByteNothingToRead
   113  	default:
   114  		err = errDecUnreadByteUnknown
   115  	}
   116  	return
   117  }
   118  
   119  func (z *ioReaderByteScannerT) Read(p []byte) (n int, err error) {
   120  	if len(p) == 0 {
   121  		return
   122  	}
   123  	var firstByte bool
   124  	if z.ls == unreadByteCanRead {
   125  		z.ls = unreadByteCanUnread
   126  		p[0] = z.l
   127  		if len(p) == 1 {
   128  			n = 1
   129  			return
   130  		}
   131  		firstByte = true
   132  		p = p[1:]
   133  	}
   134  	n, err = z.r.Read(p)
   135  	if n > 0 {
   136  		if err == io.EOF && n == len(p) {
   137  			err = nil // read was successful, so postpone EOF (till next time)
   138  		}
   139  		z.l = p[n-1]
   140  		z.ls = unreadByteCanUnread
   141  	}
   142  	if firstByte {
   143  		n++
   144  	}
   145  	return
   146  }
   147  
   148  func (z *ioReaderByteScannerT) reset(r io.Reader) {
   149  	z.r = r
   150  	z.ls = unreadByteUndefined
   151  	z.l = 0
   152  }
   153  
   154  // ioDecReader is a decReader that reads off an io.Reader.
   155  type ioDecReader struct {
   156  	rr ioReaderByteScannerT // the reader passed in, wrapped into a reader+bytescanner
   157  
   158  	n uint // num read
   159  
   160  	blist *bytesFreelist
   161  
   162  	bufr []byte              // buffer for readTo/readUntil
   163  	br   ioReaderByteScanner // main reader used for Read|ReadByte|UnreadByte
   164  	bb   *bufio.Reader       // created internally, and reused on reset if needed
   165  
   166  	x [64 + 40]byte // for: get struct field name, swallow valueTypeBytes, etc
   167  }
   168  
   169  func (z *ioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) {
   170  	z.blist = blist
   171  	z.n = 0
   172  	z.bufr = z.blist.check(z.bufr, 256)
   173  	z.br = nil
   174  
   175  	var ok bool
   176  
   177  	if bufsize <= 0 {
   178  		z.br, ok = r.(ioReaderByteScanner)
   179  		if !ok {
   180  			z.rr.reset(r)
   181  			z.br = &z.rr
   182  		}
   183  		return
   184  	}
   185  
   186  	// bufsize > 0 ...
   187  
   188  	// if bytes.[Buffer|Reader], no value in adding extra buffer
   189  	// if bufio.Reader, no value in extra buffer unless size changes
   190  	switch bb := r.(type) {
   191  	case *strings.Reader:
   192  		z.br = bb
   193  	case *bytes.Buffer:
   194  		z.br = bb
   195  	case *bytes.Reader:
   196  		z.br = bb
   197  	case *bufio.Reader:
   198  		if bb.Size() == bufsize {
   199  			z.br = bb
   200  		}
   201  	}
   202  
   203  	if z.br == nil {
   204  		if z.bb != nil && z.bb.Size() == bufsize {
   205  			z.bb.Reset(r)
   206  		} else {
   207  			z.bb = bufio.NewReaderSize(r, bufsize)
   208  		}
   209  		z.br = z.bb
   210  	}
   211  }
   212  
   213  func (z *ioDecReader) numread() uint {
   214  	return z.n
   215  }
   216  
   217  func (z *ioDecReader) readn1() (b uint8) {
   218  	b, err := z.br.ReadByte()
   219  	halt.onerror(err)
   220  	z.n++
   221  	return
   222  }
   223  
   224  func (z *ioDecReader) readn2() (bs [2]byte) {
   225  	z.readb(bs[:])
   226  	return
   227  }
   228  
   229  func (z *ioDecReader) readn3() (bs [3]byte) {
   230  	z.readb(bs[:])
   231  	return
   232  }
   233  
   234  func (z *ioDecReader) readn4() (bs [4]byte) {
   235  	z.readb(bs[:])
   236  	return
   237  }
   238  
   239  func (z *ioDecReader) readn8() (bs [8]byte) {
   240  	z.readb(bs[:])
   241  	return
   242  }
   243  
   244  func (z *ioDecReader) readx(n uint) (bs []byte) {
   245  	if n == 0 {
   246  		return zeroByteSlice
   247  	}
   248  	if n < uint(len(z.x)) {
   249  		bs = z.x[:n]
   250  	} else {
   251  		bs = make([]byte, n)
   252  	}
   253  	nn, err := readFull(z.br, bs)
   254  	z.n += nn
   255  	halt.onerror(err)
   256  	return
   257  }
   258  
   259  func (z *ioDecReader) readb(bs []byte) {
   260  	if len(bs) == 0 {
   261  		return
   262  	}
   263  	nn, err := readFull(z.br, bs)
   264  	z.n += nn
   265  	halt.onerror(err)
   266  }
   267  
   268  // func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
   269  // 	b, err := z.br.ReadByte()
   270  // 	if err == nil {
   271  // 		z.n++
   272  // 	} else if err == io.EOF {
   273  // 		eof = true
   274  // 	} else {
   275  // 		halt.onerror(err)
   276  // 	}
   277  // 	return
   278  // }
   279  
   280  func (z *ioDecReader) jsonReadNum() (bs []byte) {
   281  	z.unreadn1()
   282  	z.bufr = z.bufr[:0]
   283  LOOP:
   284  	// i, eof := z.readn1eof()
   285  	i, err := z.br.ReadByte()
   286  	if err == io.EOF {
   287  		return z.bufr
   288  	}
   289  	if err != nil {
   290  		halt.onerror(err)
   291  	}
   292  	z.n++
   293  	if isNumberChar(i) {
   294  		z.bufr = append(z.bufr, i)
   295  		goto LOOP
   296  	}
   297  	z.unreadn1()
   298  	return z.bufr
   299  }
   300  
   301  func (z *ioDecReader) jsonReadAsisChars() (bs []byte) {
   302  	z.bufr = z.bufr[:0]
   303  LOOP:
   304  	i := z.readn1()
   305  	z.bufr = append(z.bufr, i)
   306  	if i == '"' || i == '\\' {
   307  		return z.bufr
   308  	}
   309  	goto LOOP
   310  }
   311  
   312  func (z *ioDecReader) skipWhitespace() (token byte) {
   313  LOOP:
   314  	token = z.readn1()
   315  	if isWhitespaceChar(token) {
   316  		goto LOOP
   317  	}
   318  	return
   319  }
   320  
   321  // func (z *ioDecReader) readUntil(stop byte) []byte {
   322  // 	z.bufr = z.bufr[:0]
   323  // LOOP:
   324  // 	token := z.readn1()
   325  // 	z.bufr = append(z.bufr, token)
   326  // 	if token == stop {
   327  // 		return z.bufr[:len(z.bufr)-1]
   328  // 	}
   329  // 	goto LOOP
   330  // }
   331  
   332  func (z *ioDecReader) readUntil(stop byte) []byte {
   333  	z.bufr = z.bufr[:0]
   334  LOOP:
   335  	token := z.readn1()
   336  	if token == stop {
   337  		return z.bufr
   338  	}
   339  	z.bufr = append(z.bufr, token)
   340  	goto LOOP
   341  }
   342  
   343  func (z *ioDecReader) unreadn1() {
   344  	err := z.br.UnreadByte()
   345  	halt.onerror(err)
   346  	z.n--
   347  }
   348  
   349  // ------------------------------------
   350  
   351  // bytesDecReader is a decReader that reads off a byte slice with zero copying
   352  //
   353  // Note: we do not try to convert index'ing out of bounds to an io error.
   354  // instead, we let it bubble up to the exported Encode/Decode method
   355  // and recover it as an io error.
   356  //
   357  // Every function here MUST defensively check bounds either explicitly
   358  // or via a bounds check.
   359  //
   360  // see panicValToErr(...) function in helper.go.
   361  type bytesDecReader struct {
   362  	b []byte // data
   363  	c uint   // cursor
   364  }
   365  
   366  func (z *bytesDecReader) reset(in []byte) {
   367  	z.b = in[:len(in):len(in)] // reslicing must not go past capacity
   368  	z.c = 0
   369  }
   370  
   371  func (z *bytesDecReader) numread() uint {
   372  	return z.c
   373  }
   374  
   375  // Note: slicing from a non-constant start position is more expensive,
   376  // as more computation is required to decipher the pointer start position.
   377  // However, we do it only once, and it's better than reslicing both z.b and return value.
   378  
   379  func (z *bytesDecReader) readx(n uint) (bs []byte) {
   380  	// x := z.c + n
   381  	// bs = z.b[z.c:x]
   382  	// z.c = x
   383  	bs = z.b[z.c : z.c+n]
   384  	z.c += n
   385  	return
   386  }
   387  
   388  func (z *bytesDecReader) readb(bs []byte) {
   389  	copy(bs, z.readx(uint(len(bs))))
   390  }
   391  
   392  // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown)
   393  // func (z *bytesDecReader) readnn(bs []byte, n uint) {
   394  // 	x := z.c
   395  // 	copy(bs, z.b[x:x+n])
   396  // 	z.c += n
   397  // }
   398  
   399  // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) {
   400  // 	x := z.c + uint(num)
   401  // 	copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past
   402  // 	z.c = x
   403  // 	return
   404  // }
   405  
   406  // func (z *bytesDecReader) readn1() uint8 {
   407  // 	z.c++
   408  // 	return z.b[z.c-1]
   409  // }
   410  
   411  // MARKER: readn{1,2,3,4,8} should throw an out of bounds error if past length.
   412  // MARKER: readn1: explicitly ensure bounds check is done
   413  // MARKER: readn{2,3,4,8}: ensure you slice z.b completely so we get bounds error if past end.
   414  
   415  func (z *bytesDecReader) readn1() (v uint8) {
   416  	v = z.b[z.c]
   417  	z.c++
   418  	return
   419  }
   420  
   421  func (z *bytesDecReader) readn2() (bs [2]byte) {
   422  	// copy(bs[:], z.b[z.c:z.c+2])
   423  	// bs[1] = z.b[z.c+1]
   424  	// bs[0] = z.b[z.c]
   425  	bs = okBytes2(z.b[z.c : z.c+2])
   426  	z.c += 2
   427  	return
   428  }
   429  
   430  func (z *bytesDecReader) readn3() (bs [3]byte) {
   431  	// copy(bs[1:], z.b[z.c:z.c+3])
   432  	bs = okBytes3(z.b[z.c : z.c+3])
   433  	z.c += 3
   434  	return
   435  }
   436  
   437  func (z *bytesDecReader) readn4() (bs [4]byte) {
   438  	// copy(bs[:], z.b[z.c:z.c+4])
   439  	bs = okBytes4(z.b[z.c : z.c+4])
   440  	z.c += 4
   441  	return
   442  }
   443  
   444  func (z *bytesDecReader) readn8() (bs [8]byte) {
   445  	// copy(bs[:], z.b[z.c:z.c+8])
   446  	bs = okBytes8(z.b[z.c : z.c+8])
   447  	z.c += 8
   448  	return
   449  }
   450  
   451  func (z *bytesDecReader) jsonReadNum() []byte {
   452  	z.c-- // unread
   453  	i := z.c
   454  LOOP:
   455  	// gracefully handle end of slice, as end of stream is meaningful here
   456  	if i < uint(len(z.b)) && isNumberChar(z.b[i]) {
   457  		i++
   458  		goto LOOP
   459  	}
   460  	z.c, i = i, z.c
   461  	// MARKER: 20230103: byteSliceOf here prevents inlining of jsonReadNum
   462  	// return byteSliceOf(z.b, i, z.c)
   463  	return z.b[i:z.c]
   464  }
   465  
   466  func (z *bytesDecReader) jsonReadAsisChars() []byte {
   467  	i := z.c
   468  LOOP:
   469  	token := z.b[i]
   470  	i++
   471  	if token == '"' || token == '\\' {
   472  		z.c, i = i, z.c
   473  		return byteSliceOf(z.b, i, z.c)
   474  		// return z.b[i:z.c]
   475  	}
   476  	goto LOOP
   477  }
   478  
   479  func (z *bytesDecReader) skipWhitespace() (token byte) {
   480  	i := z.c
   481  LOOP:
   482  	token = z.b[i]
   483  	if isWhitespaceChar(token) {
   484  		i++
   485  		goto LOOP
   486  	}
   487  	z.c = i + 1
   488  	return
   489  }
   490  
   491  func (z *bytesDecReader) readUntil(stop byte) (out []byte) {
   492  	i := z.c
   493  LOOP:
   494  	if z.b[i] == stop {
   495  		out = byteSliceOf(z.b, z.c, i)
   496  		// out = z.b[z.c:i]
   497  		z.c = i + 1
   498  		return
   499  	}
   500  	i++
   501  	goto LOOP
   502  }
   503  
   504  // --------------
   505  
   506  type decRd struct {
   507  	rb bytesDecReader
   508  	ri *ioDecReader
   509  
   510  	decReader
   511  
   512  	bytes bool // is bytes reader
   513  
   514  	// MARKER: these fields below should belong directly in Encoder.
   515  	// we pack them here for space efficiency and cache-line optimization.
   516  
   517  	mtr bool // is maptype a known type?
   518  	str bool // is slicetype a known type?
   519  
   520  	be   bool // is binary encoding
   521  	js   bool // is json handle
   522  	jsms bool // is json handle, and MapKeyAsString
   523  	cbor bool // is cbor handle
   524  
   525  	cbreak bool // is a check breaker
   526  
   527  }
   528  
   529  // From out benchmarking, we see the following impact performance:
   530  //
   531  // - functions that are too big to inline
   532  // - interface calls (as no inlining can occur)
   533  //
   534  // decRd is designed to embed a decReader, and then re-implement some of the decReader
   535  // methods using a conditional branch.
   536  //
   537  // We only override the ones where the bytes version is inlined AND the wrapper method
   538  // (containing the bytes version alongside a conditional branch) is also inlined.
   539  //
   540  // We use ./run.sh -z to check.
   541  //
   542  // Right now, only numread and "carefully crafted" readn1 can be inlined.
   543  
   544  func (z *decRd) numread() uint {
   545  	if z.bytes {
   546  		return z.rb.numread()
   547  	}
   548  	return z.ri.numread()
   549  }
   550  
   551  func (z *decRd) readn1() (v uint8) {
   552  	if z.bytes {
   553  		// return z.rb.readn1()
   554  		// MARKER: calling z.rb.readn1() prevents decRd.readn1 from being inlined.
   555  		// copy code, to manually inline and explicitly return here.
   556  		// Keep in sync with bytesDecReader.readn1
   557  		v = z.rb.b[z.rb.c]
   558  		z.rb.c++
   559  		return
   560  	}
   561  	return z.ri.readn1()
   562  }
   563  
   564  // func (z *decRd) readn4() [4]byte {
   565  // 	if z.bytes {
   566  // 		return z.rb.readn4()
   567  // 	}
   568  // 	return z.ri.readn4()
   569  // }
   570  
   571  // func (z *decRd) readn3() [3]byte {
   572  // 	if z.bytes {
   573  // 		return z.rb.readn3()
   574  // 	}
   575  // 	return z.ri.readn3()
   576  // }
   577  
   578  // func (z *decRd) skipWhitespace() byte {
   579  // 	if z.bytes {
   580  // 		return z.rb.skipWhitespace()
   581  // 	}
   582  // 	return z.ri.skipWhitespace()
   583  // }
   584  
   585  type devNullReader struct{}
   586  
   587  func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF }
   588  func (devNullReader) Close() error               { return nil }
   589  
   590  func readFull(r io.Reader, bs []byte) (n uint, err error) {
   591  	var nn int
   592  	for n < uint(len(bs)) && err == nil {
   593  		nn, err = r.Read(bs[n:])
   594  		if nn > 0 {
   595  			if err == io.EOF {
   596  				// leave EOF for next time
   597  				err = nil
   598  			}
   599  			n += uint(nn)
   600  		}
   601  	}
   602  	// do not do this below - it serves no purpose
   603  	// if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF }
   604  	return
   605  }
   606  
   607  var _ decReader = (*decRd)(nil)