github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/internal/decoder/stream.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package decoder
    18  
    19  import (
    20  	"bytes"
    21  	"io"
    22  	"sync"
    23  
    24  	"github.com/goshafaq/sonic/internal/native"
    25  	"github.com/goshafaq/sonic/internal/native/types"
    26  	"github.com/goshafaq/sonic/option"
    27  )
    28  
    29  var (
    30  	minLeftBufferShift uint = 1
    31  )
    32  
    33  // StreamDecoder is the decoder context object for streaming input.
    34  type StreamDecoder struct {
    35  	r       io.Reader
    36  	buf     []byte
    37  	scanp   int
    38  	scanned int64
    39  	err     error
    40  	Decoder
    41  }
    42  
    43  var bufPool = sync.Pool{
    44  	New: func() interface{} {
    45  		return make([]byte, 0, option.DefaultDecoderBufferSize)
    46  	},
    47  }
    48  
    49  // NewStreamDecoder adapts to encoding/json.NewDecoder API.
    50  //
    51  // NewStreamDecoder returns a new decoder that reads from r.
    52  func NewStreamDecoder(r io.Reader) *StreamDecoder {
    53  	return &StreamDecoder{r: r}
    54  }
    55  
    56  // Decode decodes input stream into val with corresponding data.
    57  // Redundantly bytes may be read and left in its buffer, and can be used at next call.
    58  // Either io error from underlying io.Reader (except io.EOF)
    59  // or syntax error from data will be recorded and stop subsequently decoding.
    60  func (self *StreamDecoder) Decode(val interface{}) (err error) {
    61  	if self.err != nil {
    62  		return self.err
    63  	}
    64  
    65  	var buf = self.buf[self.scanp:]
    66  	var p = 0
    67  	var recycle bool
    68  	if cap(buf) == 0 {
    69  		buf = bufPool.Get().([]byte)
    70  		recycle = true
    71  	}
    72  
    73  	var first = true
    74  	var repeat = true
    75  
    76  read_more:
    77  	for {
    78  		l := len(buf)
    79  		realloc(&buf)
    80  		n, err := self.r.Read(buf[l:cap(buf)])
    81  		buf = buf[:l+n]
    82  		if err != nil {
    83  			repeat = false
    84  			if err == io.EOF {
    85  				if len(buf) == 0 {
    86  					return err
    87  				}
    88  				break
    89  			}
    90  			self.err = err
    91  			return err
    92  		}
    93  		if n > 0 || first {
    94  			break
    95  		}
    96  	}
    97  	first = false
    98  
    99  	l := len(buf)
   100  	if l > 0 {
   101  		self.Decoder.Reset(string(buf))
   102  
   103  		var x int
   104  		if ret := native.SkipOneFast(&self.s, &x); ret < 0 {
   105  			if repeat {
   106  				goto read_more
   107  			} else {
   108  				err = SyntaxError{x, self.s, types.ParsingError(-ret), ""}
   109  				self.err = err
   110  				return
   111  			}
   112  		}
   113  
   114  		err = self.Decoder.Decode(val)
   115  		if err != nil {
   116  			self.err = err
   117  		}
   118  
   119  		p = self.Decoder.Pos()
   120  		self.scanned += int64(p)
   121  		self.scanp = 0
   122  	}
   123  
   124  	if l > p {
   125  		// remain undecoded bytes, so copy them into self.buf
   126  		self.buf = append(self.buf[:0], buf[p:]...)
   127  	} else {
   128  		self.buf = nil
   129  		recycle = true
   130  	}
   131  
   132  	if recycle {
   133  		buf = buf[:0]
   134  		bufPool.Put(buf)
   135  	}
   136  	return err
   137  }
   138  
   139  func (self StreamDecoder) repeatable(err error) bool {
   140  	if ee, ok := err.(SyntaxError); ok &&
   141  		(ee.Code == types.ERR_EOF || (ee.Code == types.ERR_INVALID_CHAR && self.i >= len(self.s)-1)) {
   142  		return true
   143  	}
   144  	return false
   145  }
   146  
   147  // InputOffset returns the input stream byte offset of the current decoder position.
   148  // The offset gives the location of the end of the most recently returned token and the beginning of the next token.
   149  func (self *StreamDecoder) InputOffset() int64 {
   150  	return self.scanned + int64(self.scanp)
   151  }
   152  
   153  // Buffered returns a reader of the data remaining in the Decoder's buffer.
   154  // The reader is valid until the next call to Decode.
   155  func (self *StreamDecoder) Buffered() io.Reader {
   156  	return bytes.NewReader(self.buf[self.scanp:])
   157  }
   158  
   159  // More reports whether there is another element in the
   160  // current array or object being parsed.
   161  func (self *StreamDecoder) More() bool {
   162  	if self.err != nil {
   163  		return false
   164  	}
   165  	c, err := self.peek()
   166  	return err == nil && c != ']' && c != '}'
   167  }
   168  
   169  func (self *StreamDecoder) peek() (byte, error) {
   170  	var err error
   171  	for {
   172  		for i := self.scanp; i < len(self.buf); i++ {
   173  			c := self.buf[i]
   174  			if isSpace(c) {
   175  				continue
   176  			}
   177  			self.scanp = i
   178  			return c, nil
   179  		}
   180  		// buffer has been scanned, now report any error
   181  		if err != nil {
   182  			if err != io.EOF {
   183  				self.err = err
   184  			}
   185  			return 0, err
   186  		}
   187  		err = self.refill()
   188  	}
   189  }
   190  
   191  func isSpace(c byte) bool {
   192  	return types.SPACE_MASK&(1<<c) != 0
   193  }
   194  
   195  func (self *StreamDecoder) refill() error {
   196  	// Make room to read more into the buffer.
   197  	// First slide down data already consumed.
   198  	if self.scanp > 0 {
   199  		self.scanned += int64(self.scanp)
   200  		n := copy(self.buf, self.buf[self.scanp:])
   201  		self.buf = self.buf[:n]
   202  		self.scanp = 0
   203  	}
   204  
   205  	// Grow buffer if not large enough.
   206  	realloc(&self.buf)
   207  
   208  	// Read. Delay error for next iteration (after scan).
   209  	n, err := self.r.Read(self.buf[len(self.buf):cap(self.buf)])
   210  	self.buf = self.buf[0 : len(self.buf)+n]
   211  
   212  	return err
   213  }
   214  
   215  func realloc(buf *[]byte) {
   216  	l := uint(len(*buf))
   217  	c := uint(cap(*buf))
   218  	if c-l <= c>>minLeftBufferShift {
   219  		e := l + (l >> minLeftBufferShift)
   220  		if e < option.DefaultDecoderBufferSize {
   221  			e = option.DefaultDecoderBufferSize
   222  		}
   223  		tmp := make([]byte, l, e)
   224  		copy(tmp, *buf)
   225  		*buf = tmp
   226  	}
   227  }