github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/jsoni/iter.go (about)

     1  package jsoni
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  )
     9  
    10  // ValueType the type for JSON element
    11  type ValueType int
    12  
    13  const (
    14  	// InvalidValue invalid JSON element
    15  	InvalidValue ValueType = iota
    16  	// StringValue JSON element "string"
    17  	StringValue
    18  	// NumberValue JSON element 100 or 0.10
    19  	NumberValue
    20  	// NilValue JSON element null
    21  	NilValue
    22  	// BoolValue JSON element true or false
    23  	BoolValue
    24  	// ArrayValue JSON element []
    25  	ArrayValue
    26  	// ObjectValue JSON element {}
    27  	ObjectValue
    28  )
    29  
    30  var (
    31  	hexDigits  []byte
    32  	valueTypes []ValueType
    33  )
    34  
    35  func init() {
    36  	hexDigits = make([]byte, 256)
    37  	for i := 0; i < len(hexDigits); i++ {
    38  		hexDigits[i] = 255
    39  	}
    40  	for i := '0'; i <= '9'; i++ {
    41  		hexDigits[i] = byte(i - '0')
    42  	}
    43  	for i := 'a'; i <= 'f'; i++ {
    44  		hexDigits[i] = byte((i - 'a') + 10)
    45  	}
    46  	for i := 'A'; i <= 'F'; i++ {
    47  		hexDigits[i] = byte((i - 'A') + 10)
    48  	}
    49  	valueTypes = make([]ValueType, 256)
    50  	for i := 0; i < len(valueTypes); i++ {
    51  		valueTypes[i] = InvalidValue
    52  	}
    53  	valueTypes['"'] = StringValue
    54  	valueTypes['-'] = NumberValue
    55  	for c := '0'; c <= '9'; c++ {
    56  		valueTypes[c] = NumberValue
    57  	}
    58  	valueTypes['t'] = BoolValue
    59  	valueTypes['f'] = BoolValue
    60  	valueTypes['n'] = NilValue
    61  	valueTypes['['] = ArrayValue
    62  	valueTypes['{'] = ObjectValue
    63  }
    64  
    65  // Iterator is a io.Reader like object, with JSON specific read functions.
    66  // Error is not returned as return value, but stored as Error member on this iterator instance.
    67  type Iterator struct {
    68  	cfg              *frozenConfig
    69  	reader           io.Reader
    70  	buf              []byte
    71  	head             int
    72  	tail             int
    73  	depth            int
    74  	captureStartedAt int
    75  	captured         []byte
    76  	Error            error
    77  	Attachment       interface{} // open for customized decoder
    78  }
    79  
    80  // NewIterator creates an empty Iterator instance
    81  func NewIterator(cfg API) *Iterator {
    82  	return &Iterator{
    83  		cfg:    cfg.(*frozenConfig),
    84  		reader: nil,
    85  		buf:    nil,
    86  		head:   0,
    87  		tail:   0,
    88  		depth:  0,
    89  	}
    90  }
    91  
    92  // Parse creates an Iterator instance from io.Reader
    93  func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
    94  	return &Iterator{
    95  		cfg:    cfg.(*frozenConfig),
    96  		reader: reader,
    97  		buf:    make([]byte, bufSize),
    98  		head:   0,
    99  		tail:   0,
   100  		depth:  0,
   101  	}
   102  }
   103  
   104  // ParseBytes creates an Iterator instance from byte array
   105  func ParseBytes(cfg API, input []byte) *Iterator {
   106  	return &Iterator{
   107  		cfg:    cfg.(*frozenConfig),
   108  		reader: nil,
   109  		buf:    input,
   110  		head:   0,
   111  		tail:   len(input),
   112  		depth:  0,
   113  	}
   114  }
   115  
   116  // ParseString creates an Iterator instance from string
   117  func ParseString(cfg API, input string) *Iterator {
   118  	return ParseBytes(cfg, []byte(input))
   119  }
   120  
   121  // Pool returns a pool can provide more iterator with same configuration
   122  func (iter *Iterator) Pool() IteratorPool {
   123  	return iter.cfg
   124  }
   125  
   126  // Reset reuse iterator instance by specifying another reader
   127  func (iter *Iterator) Reset(reader io.Reader) *Iterator {
   128  	iter.reader = reader
   129  	iter.head = 0
   130  	iter.tail = 0
   131  	iter.depth = 0
   132  	return iter
   133  }
   134  
   135  // ResetBytes reuse iterator instance by specifying another byte array as input
   136  func (iter *Iterator) ResetBytes(input []byte) *Iterator {
   137  	iter.reader = nil
   138  	iter.buf = input
   139  	iter.head = 0
   140  	iter.tail = len(input)
   141  	iter.depth = 0
   142  	return iter
   143  }
   144  
   145  // WhatIsNext gets ValueType of relatively next json element
   146  func (iter *Iterator) WhatIsNext() ValueType {
   147  	valueType := valueTypes[iter.nextToken()]
   148  	iter.unreadByte()
   149  	return valueType
   150  }
   151  
   152  func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
   153  	for i := iter.head; i < iter.tail; i++ {
   154  		c := iter.buf[i]
   155  		switch c {
   156  		case ' ', '\n', '\t', '\r':
   157  			continue
   158  		}
   159  		iter.head = i
   160  		return false
   161  	}
   162  	return true
   163  }
   164  
   165  func (iter *Iterator) isObjectEnd() bool {
   166  	c := iter.nextToken()
   167  	if c == ',' {
   168  		return false
   169  	}
   170  	if c == '}' {
   171  		return true
   172  	}
   173  	iter.ReportError("isObjectEnd", "object ended prematurely, unexpected char "+string([]byte{c}))
   174  	return true
   175  }
   176  
   177  func (iter *Iterator) nextToken() byte {
   178  	// a variation of skip whitespaces, returning the next non-whitespace token
   179  	for {
   180  		for i := iter.head; i < iter.tail; i++ {
   181  			c := iter.buf[i]
   182  			switch c {
   183  			case ' ', '\n', '\t', '\r':
   184  				continue
   185  			}
   186  			iter.head = i + 1
   187  			return c
   188  		}
   189  		if !iter.loadMore() {
   190  			return 0
   191  		}
   192  	}
   193  }
   194  
   195  // ReportError record a error in iterator instance with current position.
   196  func (iter *Iterator) ReportError(operation string, msg string) {
   197  	if iter.Error != nil {
   198  		if iter.Error != io.EOF {
   199  			return
   200  		}
   201  	}
   202  	peekStart := iter.head - 10
   203  	if peekStart < 0 {
   204  		peekStart = 0
   205  	}
   206  	peekEnd := iter.head + 10
   207  	if peekEnd > iter.tail {
   208  		peekEnd = iter.tail
   209  	}
   210  	parsing := string(iter.buf[peekStart:peekEnd])
   211  	contextStart := iter.head - 50
   212  	if contextStart < 0 {
   213  		contextStart = 0
   214  	}
   215  	contextEnd := iter.head + 50
   216  	if contextEnd > iter.tail {
   217  		contextEnd = iter.tail
   218  	}
   219  	context := string(iter.buf[contextStart:contextEnd])
   220  	iter.Error = fmt.Errorf("%s: %s, error found in #%v byte of ...|%s|..., bigger context ...|%s|...",
   221  		operation, msg, iter.head-peekStart, parsing, context)
   222  }
   223  
   224  // CurrentBuffer gets current buffer as string for debugging purpose
   225  func (iter *Iterator) CurrentBuffer() string {
   226  	peekStart := iter.head - 10
   227  	if peekStart < 0 {
   228  		peekStart = 0
   229  	}
   230  	return fmt.Sprintf("parsing #%v byte, around ...|%s|..., whole buffer ...|%s|...", iter.head,
   231  		string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
   232  }
   233  
   234  func (iter *Iterator) readByte() (ret byte) {
   235  	if iter.head == iter.tail {
   236  		if iter.loadMore() {
   237  			ret = iter.buf[iter.head]
   238  			iter.head++
   239  			return ret
   240  		}
   241  		return 0
   242  	}
   243  	ret = iter.buf[iter.head]
   244  	iter.head++
   245  	return ret
   246  }
   247  
   248  func (iter *Iterator) loadMore() bool {
   249  	if iter.reader == nil {
   250  		if iter.Error == nil {
   251  			iter.head = iter.tail
   252  			iter.Error = io.EOF
   253  		}
   254  		return false
   255  	}
   256  	if iter.captured != nil {
   257  		iter.captured = append(iter.captured,
   258  			iter.buf[iter.captureStartedAt:iter.tail]...)
   259  		iter.captureStartedAt = 0
   260  	}
   261  	for {
   262  		n, err := iter.reader.Read(iter.buf)
   263  		if n == 0 {
   264  			if err != nil {
   265  				if iter.Error == nil {
   266  					iter.Error = err
   267  				}
   268  				return false
   269  			}
   270  		} else {
   271  			iter.head = 0
   272  			iter.tail = n
   273  			return true
   274  		}
   275  	}
   276  }
   277  
   278  func (iter *Iterator) unreadByte() {
   279  	if iter.Error != nil {
   280  		return
   281  	}
   282  	iter.head--
   283  	return
   284  }
   285  
   286  // Read reads the next JSON element as generic interface{}.
   287  func (iter *Iterator) Read(ctx context.Context) interface{} {
   288  	valueType := iter.WhatIsNext()
   289  	switch valueType {
   290  	case StringValue:
   291  		return iter.ReadString()
   292  	case NumberValue:
   293  		if iter.cfg.configBeforeFrozen.UseNumber {
   294  			return json.Number(iter.readNumberAsString())
   295  		}
   296  		return iter.ReadFloat64()
   297  	case NilValue:
   298  		iter.skip4Bytes('n', 'u', 'l', 'l')
   299  		return nil
   300  	case BoolValue:
   301  		return iter.ReadBool()
   302  	case ArrayValue:
   303  		var arr []interface{}
   304  		iter.ReadArrayCB(func(iter *Iterator) bool {
   305  			var elem interface{}
   306  			iter.ReadVal(ctx, &elem)
   307  			arr = append(arr, elem)
   308  			return true
   309  		})
   310  		return arr
   311  	case ObjectValue:
   312  		obj := map[string]interface{}{}
   313  		iter.ReadMapCB(func(Iter *Iterator, field string) bool {
   314  			var elem interface{}
   315  			iter.ReadVal(ctx, &elem)
   316  			obj[field] = elem
   317  			return true
   318  		})
   319  		return obj
   320  	default:
   321  		iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
   322  		return nil
   323  	}
   324  }
   325  
   326  // limit maximum depth of nesting, as allowed by https://tools.ietf.org/html/rfc7159#section-9
   327  const maxDepth = 10000
   328  
   329  func (iter *Iterator) incrementDepth() (success bool) {
   330  	iter.depth++
   331  	if iter.depth <= maxDepth {
   332  		return true
   333  	}
   334  	iter.ReportError("incrementDepth", "exceeded max depth")
   335  	return false
   336  }
   337  
   338  func (iter *Iterator) decrementDepth() (success bool) {
   339  	iter.depth--
   340  	if iter.depth >= 0 {
   341  		return true
   342  	}
   343  	iter.ReportError("decrementDepth", "unexpected negative nesting")
   344  	return false
   345  }