github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/internal/decoder/stream.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package decoder
    18  
    19  import (
    20      `bytes`
    21      `io`
    22      `sync`
    23  
    24      `github.com/bytedance/sonic/internal/native`
    25      `github.com/bytedance/sonic/internal/native/types`
    26      `github.com/bytedance/sonic/internal/rt`
    27      `github.com/bytedance/sonic/option`
    28  )
    29  
    30  var (
    31      minLeftBufferShift uint = 1
    32  )
    33  
    34  // StreamDecoder is the decoder context object for streaming input.
    35  type StreamDecoder struct {
    36      r       io.Reader
    37      buf     []byte
    38      scanp   int
    39      scanned int64
    40      err     error
    41      Decoder
    42  }
    43  
    44  var bufPool = sync.Pool{
    45      New: func () interface{} {
    46          return make([]byte, 0, option.DefaultDecoderBufferSize)
    47      },
    48  }
    49  
    50  // NewStreamDecoder adapts to encoding/json.NewDecoder API.
    51  //
    52  // NewStreamDecoder returns a new decoder that reads from r.
    53  func NewStreamDecoder(r io.Reader) *StreamDecoder {
    54      return &StreamDecoder{r : r}
    55  }
    56  
    57  // Decode decodes input stream into val with corresponding data. 
    58  // Redundantly bytes may be read and left in its buffer, and can be used at next call.
    59  // Either io error from underlying io.Reader (except io.EOF) 
    60  // or syntax error from data will be recorded and stop subsequently decoding.
    61  func (self *StreamDecoder) Decode(val interface{}) (err error) {
    62      // read more data into buf
    63      if self.More() {
    64          // println(string(self.buf))
    65          var s = self.scanp
    66      try_skip:
    67          var e = len(self.buf)
    68          // println("s:", s, "e:", e, "scanned:",self.scanned, "scanp:",self.scanp, self.buf)
    69          var src = rt.Mem2Str(self.buf[s:e])
    70          // if len(src) > 5 {
    71          //     println(src[:5], src[len(src)-5:])
    72          // } else {
    73          //     println(src)
    74          // }
    75          // try skip
    76          var x = 0;
    77          if y := native.SkipOneFast(&src, &x); y < 0 {
    78              if self.readMore()  {
    79                  // println("more")
    80                  goto try_skip
    81              } else {
    82                  // println("no more")
    83                  err = SyntaxError{e, self.s, types.ParsingError(-s), ""}
    84                  self.setErr(err)
    85                  return
    86              }
    87          } else {
    88              s = y + s
    89              e = x + s
    90          }
    91          
    92          // println("decode: ", s, e)
    93          // must copy string here for safety
    94          self.Decoder.Reset(string(self.buf[s:e]))
    95          err = self.Decoder.Decode(val)
    96          if err != nil {
    97              self.setErr(err)
    98              return 
    99          }
   100  
   101          self.scanp = e
   102          _, empty := self.scan()
   103          if empty {
   104              // println("recycle")
   105              // no remain valid bytes, thus we just recycle buffer
   106              mem := self.buf
   107              self.buf = nil
   108              bufPool.Put(mem[:0])
   109          } else {
   110              // println("keep")
   111              // remain undecoded bytes, move them onto head
   112              n := copy(self.buf, self.buf[self.scanp:])
   113              self.buf = self.buf[:n]
   114          }   
   115  
   116          self.scanned += int64(self.scanp)
   117          self.scanp = 0
   118      }    
   119  
   120      return self.err
   121  }
   122  
   123  // InputOffset returns the input stream byte offset of the current decoder position. 
   124  // The offset gives the location of the end of the most recently returned token and the beginning of the next token.
   125  func (self *StreamDecoder) InputOffset() int64 {
   126      // println("input offset",self.scanned, self.scanp)
   127      return self.scanned + int64(self.scanp)
   128  }
   129  
   130  // Buffered returns a reader of the data remaining in the Decoder's buffer. 
   131  // The reader is valid until the next call to Decode.
   132  func (self *StreamDecoder) Buffered() io.Reader {
   133      return bytes.NewReader(self.buf[self.scanp:])
   134  }
   135  
   136  // More reports whether there is another element in the
   137  // current array or object being parsed.
   138  func (self *StreamDecoder) More() bool {
   139      if self.err != nil {
   140          return false
   141      }
   142      c, err := self.peek()
   143      return err == nil && c != ']' && c != '}'
   144  }
   145  
   146  // More reports whether there is another element in the
   147  // current array or object being parsed.
   148  func (self *StreamDecoder) readMore() bool {
   149      if self.err != nil {
   150          return false
   151      }
   152  
   153      var err error
   154      var n int
   155      for {
   156          // Grow buffer if not large enough.
   157          l := len(self.buf)
   158          realloc(&self.buf)
   159  
   160          n, err = self.r.Read(self.buf[l:cap(self.buf)])
   161          self.buf = self.buf[: l+n]
   162  
   163          self.scanp = l
   164          _, empty := self.scan()
   165          if !empty {
   166              return true
   167          }
   168  
   169          // buffer has been scanned, now report any error
   170          if err != nil  {
   171              self.setErr(err)
   172              return false
   173          }
   174      }
   175  }
   176  
   177  func (self *StreamDecoder) setErr(err error) {
   178      self.err = err
   179      mem := self.buf[:0]
   180      self.buf = nil
   181      bufPool.Put(mem)
   182  }
   183  
   184  func (self *StreamDecoder) peek() (byte, error) {
   185      var err error
   186      for {
   187          c, empty := self.scan()
   188          if !empty {
   189              return byte(c), nil
   190          }
   191          // buffer has been scanned, now report any error
   192          if err != nil {
   193              self.setErr(err)
   194              return 0, err
   195          }
   196          err = self.refill()
   197      }
   198  }
   199  
   200  func (self *StreamDecoder) scan() (byte, bool) {
   201      for i := self.scanp; i < len(self.buf); i++ {
   202          c := self.buf[i]
   203          if isSpace(c) {
   204              continue
   205          }
   206          self.scanp = i
   207          return c, false
   208      }
   209      return 0, true
   210  }
   211  
   212  func isSpace(c byte) bool {
   213      return types.SPACE_MASK & (1 << c) != 0
   214  }
   215  
   216  func (self *StreamDecoder) refill() error {
   217      // Make room to read more into the buffer.
   218      // First slide down data already consumed.
   219      if self.scanp > 0 {
   220          self.scanned += int64(self.scanp)
   221          n := copy(self.buf, self.buf[self.scanp:])
   222          self.buf = self.buf[:n]
   223          self.scanp = 0
   224      }
   225  
   226      // Grow buffer if not large enough.
   227      realloc(&self.buf)
   228  
   229      // Read. Delay error for next iteration (after scan).
   230      n, err := self.r.Read(self.buf[len(self.buf):cap(self.buf)])
   231      self.buf = self.buf[0 : len(self.buf)+n]
   232  
   233      return err
   234  }
   235  
   236  func realloc(buf *[]byte) bool {
   237      l := uint(len(*buf))
   238      c := uint(cap(*buf))
   239      if c == 0 {
   240          // println("use pool!")
   241         *buf = bufPool.Get().([]byte)
   242         return true
   243      }
   244      if c - l <= c >> minLeftBufferShift {
   245          // println("realloc!")
   246          e := l+(l>>minLeftBufferShift)
   247          if e <= c {
   248              e = c*2
   249          }
   250          tmp := make([]byte, l, e)
   251          copy(tmp, *buf)
   252          *buf = tmp
   253          return true
   254      }
   255      return false
   256  }
   257