github.com/zerosnake0/jzon@v0.0.9-0.20230801092939-1b135cb83f7f/iterator.go (about)

     1  package jzon
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"runtime"
     8  )
     9  
    10  const bufferSize = 512
    11  
    12  // for fast reset
    13  type iteratorEmbedded struct {
    14  	/*
    15  	 * The following attributes must be able to set zero by memset
    16  	 */
    17  	capture bool
    18  	offset  int
    19  
    20  	// the current index position
    21  	head int
    22  
    23  	// eface checkpoint
    24  	lastEfaceOffset int
    25  	lastEfacePtr    uintptr
    26  
    27  	// TODO: 1. type of context?
    28  	// TODO: 2. should context be reset as well?
    29  	Context interface{} // custom iteration context
    30  }
    31  
    32  // Iterator is designed for one-shot use, each reuse must call reset first
    33  type Iterator struct {
    34  	cfg *DecoderConfig
    35  
    36  	reader io.Reader
    37  	buffer []byte
    38  	fixbuf []byte
    39  
    40  	// a temp buffer is needed for string reading
    41  	// which include utf8 conversion
    42  	tmpBuffer []byte
    43  
    44  	// the current tail position in buffer
    45  	tail int
    46  
    47  	iteratorEmbedded
    48  
    49  	useNumber             bool
    50  	disallowUnknownFields bool
    51  }
    52  
    53  // NewIterator returns a new iterator.
    54  func NewIterator() *Iterator {
    55  	return DefaultDecoderConfig.NewIterator()
    56  }
    57  
    58  // Release the iterator, the iterator should not be reused after call.
    59  func (it *Iterator) Release() {
    60  	it.cfg.returnIterator(it)
    61  }
    62  
    63  func (it *Iterator) reset() {
    64  	it.reader = nil
    65  	it.buffer = nil
    66  	it.tail = 0
    67  
    68  	// fast reset
    69  	it.iteratorEmbedded = iteratorEmbedded{}
    70  }
    71  
    72  // Reset the iterator with an io.Reader
    73  // if the reader is nil, reset the iterator to its initial state
    74  //
    75  // In reset methods, explicit assignment is faster than then following
    76  //   *it = Iterator{ ... }
    77  // When the above code is used, runtime.duffcopy and runtime.duffzero will be used
    78  // which will slow down our code (correct me if I am wrong)
    79  func (it *Iterator) Reset(r io.Reader) {
    80  	switch v := r.(type) {
    81  	case nil:
    82  		it.reset()
    83  		return
    84  	case *bytes.Buffer:
    85  		it.ResetBytes(v.Bytes())
    86  		return
    87  	}
    88  	it.reader = r
    89  	it.buffer = it.fixbuf[:cap(it.fixbuf)]
    90  	it.tail = 0
    91  
    92  	// fast reset
    93  	it.iteratorEmbedded = iteratorEmbedded{}
    94  }
    95  
    96  // ResetBytes resets iterator with a byte slice
    97  func (it *Iterator) ResetBytes(data []byte) {
    98  	it.reader = nil
    99  	it.buffer = data
   100  	it.tail = len(data)
   101  
   102  	// fast reset
   103  	it.iteratorEmbedded = iteratorEmbedded{}
   104  }
   105  
   106  // Buffer returns the current slice buffer of the iterator.
   107  func (it *Iterator) Buffer() []byte {
   108  	return it.buffer[it.head:it.tail]
   109  }
   110  
   111  const errWidth = 20
   112  
   113  func (it *Iterator) errorLocation() []byte {
   114  	var (
   115  		head int
   116  		tail int
   117  	)
   118  	if it.head > errWidth {
   119  		head = it.head - errWidth
   120  	}
   121  	if it.tail-it.head < errWidth {
   122  		tail = it.tail
   123  	} else {
   124  		tail = it.head + errWidth
   125  	}
   126  	return it.buffer[head:tail]
   127  }
   128  
   129  // WrapError wraps the error with the current iterator location
   130  func (it *Iterator) WrapError(err error) *DecodeError {
   131  	if e, ok := err.(*DecodeError); ok {
   132  		return e
   133  	}
   134  	return &DecodeError{
   135  		reason:   err,
   136  		location: string(it.errorLocation()),
   137  	}
   138  }
   139  
   140  // make sure that it.head == it.tail before call
   141  // will set error
   142  func (it *Iterator) readMore() error {
   143  	if it.reader == nil {
   144  		return io.EOF
   145  	}
   146  	var (
   147  		n   int
   148  		err error
   149  	)
   150  	for {
   151  		if it.capture {
   152  			var buf [bufferSize]byte
   153  			n, err = it.reader.Read(buf[:])
   154  			it.buffer = append(it.buffer[:it.tail], buf[:n]...)
   155  			it.tail += n
   156  			// save internal buffer for reuse
   157  			it.fixbuf = it.buffer
   158  		} else {
   159  			if jzonDebug {
   160  				if it.head != it.tail {
   161  					panic(fmt.Errorf("head %d, tail %d", it.head, it.tail))
   162  				}
   163  			}
   164  			n, err = it.reader.Read(it.buffer)
   165  			it.offset += it.tail
   166  			it.head = 0
   167  			it.tail = n
   168  		}
   169  		if err != nil {
   170  			if err == io.EOF && n > 0 {
   171  				return nil
   172  			}
   173  			return err
   174  		}
   175  		if n > 0 {
   176  			return nil
   177  		}
   178  		// n == 0 && err == nil
   179  		// the implementation of the reader is wrong
   180  		runtime.Gosched()
   181  	}
   182  }
   183  
   184  // will NOT skip whitespaces
   185  // will NOT consume the character
   186  // will report error on EOF
   187  func (it *Iterator) nextByte() (ret byte, err error) {
   188  	if it.head == it.tail {
   189  		if err = it.readMore(); err != nil {
   190  			return
   191  		}
   192  	}
   193  	return it.buffer[it.head], nil
   194  }
   195  
   196  // will consume the characters
   197  func (it *Iterator) expectBytes(s string) error {
   198  	last := len(s) - 1
   199  	j := 0
   200  	for {
   201  		i := it.head
   202  		for ; i < it.tail; i++ {
   203  			c := it.buffer[i]
   204  			if c != s[j] {
   205  				return UnexpectedByteError{exp: s[j], got: c}
   206  			}
   207  			if j == last {
   208  				it.head = i + 1
   209  				return nil
   210  			}
   211  			j++
   212  		}
   213  		it.head = i
   214  		if err := it.readMore(); err != nil {
   215  			return err
   216  		}
   217  	}
   218  }
   219  
   220  // Read until the first valid token is found, only the whitespaces are consumed
   221  func (it *Iterator) nextToken() (ret byte, err error) {
   222  	for {
   223  		i := it.head
   224  		for ; i < it.tail; i++ {
   225  			c := it.buffer[i]
   226  			if c <= ' ' {
   227  				if valueTypeMap[c] == WhiteSpaceValue {
   228  					continue
   229  				}
   230  			}
   231  			it.head = i
   232  			return c, nil
   233  		}
   234  		// the head and tail will be reset by readMore
   235  		it.head = i
   236  		if err := it.readMore(); err != nil {
   237  			return 0, err
   238  		}
   239  	}
   240  }
   241  
   242  // NextValueType read until the first valid token is found, only the whitespaces are consumed
   243  func (it *Iterator) NextValueType() (ValueType, error) {
   244  	v, err := it.nextToken()
   245  	return valueTypeMap[v], err
   246  }
   247  
   248  func (it *Iterator) unmarshal(obj interface{}) error {
   249  	err := it.ReadVal(obj)
   250  	if err != nil {
   251  		return err
   252  	}
   253  	_, err = it.nextToken()
   254  	if err == nil {
   255  		return ErrDataRemained
   256  	}
   257  	if err != io.EOF {
   258  		return err
   259  	}
   260  	return nil
   261  }
   262  
   263  // Unmarshal behave like standard json.Unmarshal
   264  func (it *Iterator) Unmarshal(data []byte, obj interface{}) error {
   265  	it.ResetBytes(data)
   266  	return it.unmarshal(obj)
   267  }
   268  
   269  // Valid behave like standard json.Valid
   270  func (it *Iterator) Valid(data []byte) bool {
   271  	it.ResetBytes(data)
   272  	err := it.Skip()
   273  	if err != nil {
   274  		return false
   275  	}
   276  	_, err = it.nextToken()
   277  	return err == io.EOF
   278  }
   279  
   280  // UnmarshalFromReader behave like standard json.Unmarshal but with an io.Reader
   281  func (it *Iterator) UnmarshalFromReader(r io.Reader, obj interface{}) error {
   282  	it.Reset(r)
   283  	return it.unmarshal(obj)
   284  }