github.com/goccy/go-json@v0.10.3-0.20240509105655-5e2ae3f23c1d/internal/decoder/stream.go (about)

     1  package decoder
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"io"
     7  	"strconv"
     8  	"unsafe"
     9  
    10  	"github.com/goccy/go-json/internal/errors"
    11  )
    12  
    13  const (
    14  	initBufSize = 512
    15  )
    16  
    17  type Stream struct {
    18  	buf                   []byte
    19  	bufSize               int64
    20  	length                int64
    21  	r                     io.Reader
    22  	offset                int64
    23  	cursor                int64
    24  	filledBuffer          bool
    25  	allRead               bool
    26  	UseNumber             bool
    27  	DisallowUnknownFields bool
    28  	Option                *Option
    29  }
    30  
    31  func NewStream(r io.Reader) *Stream {
    32  	return &Stream{
    33  		r:       r,
    34  		bufSize: initBufSize,
    35  		buf:     make([]byte, initBufSize),
    36  		Option:  &Option{},
    37  	}
    38  }
    39  
    40  func (s *Stream) TotalOffset() int64 {
    41  	return s.totalOffset()
    42  }
    43  
    44  func (s *Stream) Buffered() io.Reader {
    45  	buflen := int64(len(s.buf))
    46  	for i := s.cursor; i < buflen; i++ {
    47  		if s.buf[i] == nul {
    48  			return bytes.NewReader(s.buf[s.cursor:i])
    49  		}
    50  	}
    51  	return bytes.NewReader(s.buf[s.cursor:])
    52  }
    53  
    54  func (s *Stream) PrepareForDecode() error {
    55  	for {
    56  		switch s.char() {
    57  		case ' ', '\t', '\r', '\n':
    58  			s.cursor++
    59  			continue
    60  		case ',', ':':
    61  			s.cursor++
    62  			return nil
    63  		case nul:
    64  			if s.read() {
    65  				continue
    66  			}
    67  			return io.EOF
    68  		}
    69  		break
    70  	}
    71  	return nil
    72  }
    73  
    74  func (s *Stream) totalOffset() int64 {
    75  	return s.offset + s.cursor
    76  }
    77  
    78  func (s *Stream) char() byte {
    79  	return s.buf[s.cursor]
    80  }
    81  
    82  func (s *Stream) equalChar(c byte) bool {
    83  	cur := s.buf[s.cursor]
    84  	if cur == nul {
    85  		s.read()
    86  		cur = s.buf[s.cursor]
    87  	}
    88  	return cur == c
    89  }
    90  
    91  func (s *Stream) stat() ([]byte, int64, unsafe.Pointer) {
    92  	return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
    93  }
    94  
    95  func (s *Stream) bufptr() unsafe.Pointer {
    96  	return (*sliceHeader)(unsafe.Pointer(&s.buf)).data
    97  }
    98  
    99  func (s *Stream) statForRetry() ([]byte, int64, unsafe.Pointer) {
   100  	s.cursor-- // for retry ( because caller progress cursor position in each loop )
   101  	return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
   102  }
   103  
   104  func (s *Stream) Reset() {
   105  	s.reset()
   106  	s.bufSize = int64(len(s.buf))
   107  }
   108  
   109  func (s *Stream) More() bool {
   110  	for {
   111  		switch s.char() {
   112  		case ' ', '\n', '\r', '\t':
   113  			s.cursor++
   114  			continue
   115  		case '}', ']':
   116  			return false
   117  		case nul:
   118  			if s.read() {
   119  				continue
   120  			}
   121  			return false
   122  		}
   123  		break
   124  	}
   125  	return true
   126  }
   127  
   128  func (s *Stream) Token() (interface{}, error) {
   129  	for {
   130  		c := s.char()
   131  		switch c {
   132  		case ' ', '\n', '\r', '\t':
   133  			s.cursor++
   134  		case '{', '[', ']', '}':
   135  			s.cursor++
   136  			return json.Delim(c), nil
   137  		case ',', ':':
   138  			s.cursor++
   139  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   140  			bytes := floatBytes(s)
   141  			str := *(*string)(unsafe.Pointer(&bytes))
   142  			if s.UseNumber {
   143  				return json.Number(str), nil
   144  			}
   145  			f64, err := strconv.ParseFloat(str, 64)
   146  			if err != nil {
   147  				return nil, err
   148  			}
   149  			return f64, nil
   150  		case '"':
   151  			bytes, err := stringBytes(s)
   152  			if err != nil {
   153  				return nil, err
   154  			}
   155  			return string(bytes), nil
   156  		case 't':
   157  			if err := trueBytes(s); err != nil {
   158  				return nil, err
   159  			}
   160  			return true, nil
   161  		case 'f':
   162  			if err := falseBytes(s); err != nil {
   163  				return nil, err
   164  			}
   165  			return false, nil
   166  		case 'n':
   167  			if err := nullBytes(s); err != nil {
   168  				return nil, err
   169  			}
   170  			return nil, nil
   171  		case nul:
   172  			if s.read() {
   173  				continue
   174  			}
   175  			goto END
   176  		default:
   177  			return nil, errors.ErrInvalidCharacter(s.char(), "token", s.totalOffset())
   178  		}
   179  	}
   180  END:
   181  	return nil, io.EOF
   182  }
   183  
   184  func (s *Stream) reset() {
   185  	s.offset += s.cursor
   186  	s.buf = s.buf[s.cursor:]
   187  	s.length -= s.cursor
   188  	s.cursor = 0
   189  }
   190  
   191  func (s *Stream) readBuf() []byte {
   192  	if s.filledBuffer {
   193  		s.bufSize *= 2
   194  		remainBuf := s.buf
   195  		s.buf = make([]byte, s.bufSize)
   196  		copy(s.buf, remainBuf)
   197  	}
   198  	remainLen := s.length - s.cursor
   199  	remainNotNulCharNum := int64(0)
   200  	for i := int64(0); i < remainLen; i++ {
   201  		if s.buf[s.cursor+i] == nul {
   202  			break
   203  		}
   204  		remainNotNulCharNum++
   205  	}
   206  	s.length = s.cursor + remainNotNulCharNum
   207  	return s.buf[s.cursor+remainNotNulCharNum:]
   208  }
   209  
   210  func (s *Stream) read() bool {
   211  	if s.allRead {
   212  		return false
   213  	}
   214  	buf := s.readBuf()
   215  	last := len(buf) - 1
   216  	buf[last] = nul
   217  	n, err := s.r.Read(buf[:last])
   218  	s.length += int64(n)
   219  	if n == last {
   220  		s.filledBuffer = true
   221  	} else {
   222  		s.filledBuffer = false
   223  	}
   224  	if err == io.EOF {
   225  		s.allRead = true
   226  	} else if err != nil {
   227  		return false
   228  	}
   229  	return true
   230  }
   231  
   232  func (s *Stream) skipWhiteSpace() byte {
   233  	p := s.bufptr()
   234  LOOP:
   235  	c := char(p, s.cursor)
   236  	switch c {
   237  	case ' ', '\n', '\t', '\r':
   238  		s.cursor++
   239  		goto LOOP
   240  	case nul:
   241  		if s.read() {
   242  			p = s.bufptr()
   243  			goto LOOP
   244  		}
   245  	}
   246  	return c
   247  }
   248  
   249  func (s *Stream) skipObject(depth int64) error {
   250  	braceCount := 1
   251  	_, cursor, p := s.stat()
   252  	for {
   253  		switch char(p, cursor) {
   254  		case '{':
   255  			braceCount++
   256  			depth++
   257  			if depth > maxDecodeNestingDepth {
   258  				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
   259  			}
   260  		case '}':
   261  			braceCount--
   262  			depth--
   263  			if braceCount == 0 {
   264  				s.cursor = cursor + 1
   265  				return nil
   266  			}
   267  		case '[':
   268  			depth++
   269  			if depth > maxDecodeNestingDepth {
   270  				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
   271  			}
   272  		case ']':
   273  			depth--
   274  		case '"':
   275  			for {
   276  				cursor++
   277  				switch char(p, cursor) {
   278  				case '\\':
   279  					cursor++
   280  					if char(p, cursor) == nul {
   281  						s.cursor = cursor
   282  						if s.read() {
   283  							_, cursor, p = s.stat()
   284  							continue
   285  						}
   286  						return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
   287  					}
   288  				case '"':
   289  					goto SWITCH_OUT
   290  				case nul:
   291  					s.cursor = cursor
   292  					if s.read() {
   293  						_, cursor, p = s.statForRetry()
   294  						continue
   295  					}
   296  					return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
   297  				}
   298  			}
   299  		case nul:
   300  			s.cursor = cursor
   301  			if s.read() {
   302  				_, cursor, p = s.stat()
   303  				continue
   304  			}
   305  			return errors.ErrUnexpectedEndOfJSON("object of object", cursor)
   306  		}
   307  	SWITCH_OUT:
   308  		cursor++
   309  	}
   310  }
   311  
   312  func (s *Stream) skipArray(depth int64) error {
   313  	bracketCount := 1
   314  	_, cursor, p := s.stat()
   315  	for {
   316  		switch char(p, cursor) {
   317  		case '[':
   318  			bracketCount++
   319  			depth++
   320  			if depth > maxDecodeNestingDepth {
   321  				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
   322  			}
   323  		case ']':
   324  			bracketCount--
   325  			depth--
   326  			if bracketCount == 0 {
   327  				s.cursor = cursor + 1
   328  				return nil
   329  			}
   330  		case '{':
   331  			depth++
   332  			if depth > maxDecodeNestingDepth {
   333  				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
   334  			}
   335  		case '}':
   336  			depth--
   337  		case '"':
   338  			for {
   339  				cursor++
   340  				switch char(p, cursor) {
   341  				case '\\':
   342  					cursor++
   343  					if char(p, cursor) == nul {
   344  						s.cursor = cursor
   345  						if s.read() {
   346  							_, cursor, p = s.stat()
   347  							continue
   348  						}
   349  						return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
   350  					}
   351  				case '"':
   352  					goto SWITCH_OUT
   353  				case nul:
   354  					s.cursor = cursor
   355  					if s.read() {
   356  						_, cursor, p = s.statForRetry()
   357  						continue
   358  					}
   359  					return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
   360  				}
   361  			}
   362  		case nul:
   363  			s.cursor = cursor
   364  			if s.read() {
   365  				_, cursor, p = s.stat()
   366  				continue
   367  			}
   368  			return errors.ErrUnexpectedEndOfJSON("array of object", cursor)
   369  		}
   370  	SWITCH_OUT:
   371  		cursor++
   372  	}
   373  }
   374  
   375  func (s *Stream) skipValue(depth int64) error {
   376  	_, cursor, p := s.stat()
   377  	for {
   378  		switch char(p, cursor) {
   379  		case ' ', '\n', '\t', '\r':
   380  			cursor++
   381  			continue
   382  		case nul:
   383  			s.cursor = cursor
   384  			if s.read() {
   385  				_, cursor, p = s.stat()
   386  				continue
   387  			}
   388  			return errors.ErrUnexpectedEndOfJSON("value of object", s.totalOffset())
   389  		case '{':
   390  			s.cursor = cursor + 1
   391  			return s.skipObject(depth + 1)
   392  		case '[':
   393  			s.cursor = cursor + 1
   394  			return s.skipArray(depth + 1)
   395  		case '"':
   396  			for {
   397  				cursor++
   398  				switch char(p, cursor) {
   399  				case '\\':
   400  					cursor++
   401  					if char(p, cursor) == nul {
   402  						s.cursor = cursor
   403  						if s.read() {
   404  							_, cursor, p = s.stat()
   405  							continue
   406  						}
   407  						return errors.ErrUnexpectedEndOfJSON("value of string", s.totalOffset())
   408  					}
   409  				case '"':
   410  					s.cursor = cursor + 1
   411  					return nil
   412  				case nul:
   413  					s.cursor = cursor
   414  					if s.read() {
   415  						_, cursor, p = s.statForRetry()
   416  						continue
   417  					}
   418  					return errors.ErrUnexpectedEndOfJSON("value of string", s.totalOffset())
   419  				}
   420  			}
   421  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   422  			for {
   423  				cursor++
   424  				c := char(p, cursor)
   425  				if floatTable[c] {
   426  					continue
   427  				} else if c == nul {
   428  					if s.read() {
   429  						_, cursor, p = s.stat()
   430  						continue
   431  					}
   432  				}
   433  				s.cursor = cursor
   434  				return nil
   435  			}
   436  		case 't':
   437  			s.cursor = cursor
   438  			if err := trueBytes(s); err != nil {
   439  				return err
   440  			}
   441  			return nil
   442  		case 'f':
   443  			s.cursor = cursor
   444  			if err := falseBytes(s); err != nil {
   445  				return err
   446  			}
   447  			return nil
   448  		case 'n':
   449  			s.cursor = cursor
   450  			if err := nullBytes(s); err != nil {
   451  				return err
   452  			}
   453  			return nil
   454  		}
   455  		cursor++
   456  	}
   457  }
   458  
   459  func nullBytes(s *Stream) error {
   460  	// current cursor's character is 'n'
   461  	s.cursor++
   462  	if s.char() != 'u' {
   463  		if err := retryReadNull(s); err != nil {
   464  			return err
   465  		}
   466  	}
   467  	s.cursor++
   468  	if s.char() != 'l' {
   469  		if err := retryReadNull(s); err != nil {
   470  			return err
   471  		}
   472  	}
   473  	s.cursor++
   474  	if s.char() != 'l' {
   475  		if err := retryReadNull(s); err != nil {
   476  			return err
   477  		}
   478  	}
   479  	s.cursor++
   480  	return nil
   481  }
   482  
   483  func retryReadNull(s *Stream) error {
   484  	if s.char() == nul && s.read() {
   485  		return nil
   486  	}
   487  	return errors.ErrInvalidCharacter(s.char(), "null", s.totalOffset())
   488  }
   489  
   490  func trueBytes(s *Stream) error {
   491  	// current cursor's character is 't'
   492  	s.cursor++
   493  	if s.char() != 'r' {
   494  		if err := retryReadTrue(s); err != nil {
   495  			return err
   496  		}
   497  	}
   498  	s.cursor++
   499  	if s.char() != 'u' {
   500  		if err := retryReadTrue(s); err != nil {
   501  			return err
   502  		}
   503  	}
   504  	s.cursor++
   505  	if s.char() != 'e' {
   506  		if err := retryReadTrue(s); err != nil {
   507  			return err
   508  		}
   509  	}
   510  	s.cursor++
   511  	return nil
   512  }
   513  
   514  func retryReadTrue(s *Stream) error {
   515  	if s.char() == nul && s.read() {
   516  		return nil
   517  	}
   518  	return errors.ErrInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
   519  }
   520  
   521  func falseBytes(s *Stream) error {
   522  	// current cursor's character is 'f'
   523  	s.cursor++
   524  	if s.char() != 'a' {
   525  		if err := retryReadFalse(s); err != nil {
   526  			return err
   527  		}
   528  	}
   529  	s.cursor++
   530  	if s.char() != 'l' {
   531  		if err := retryReadFalse(s); err != nil {
   532  			return err
   533  		}
   534  	}
   535  	s.cursor++
   536  	if s.char() != 's' {
   537  		if err := retryReadFalse(s); err != nil {
   538  			return err
   539  		}
   540  	}
   541  	s.cursor++
   542  	if s.char() != 'e' {
   543  		if err := retryReadFalse(s); err != nil {
   544  			return err
   545  		}
   546  	}
   547  	s.cursor++
   548  	return nil
   549  }
   550  
   551  func retryReadFalse(s *Stream) error {
   552  	if s.char() == nul && s.read() {
   553  		return nil
   554  	}
   555  	return errors.ErrInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
   556  }