github.com/segmentio/encoding@v0.3.6/json/token.go (about)

     1  package json
     2  
     3  import (
     4  	"strconv"
     5  	"sync"
     6  	"unsafe"
     7  )
     8  
     9  // Tokenizer is an iterator-style type which can be used to progressively parse
    10  // through a json input.
    11  //
    12  // Tokenizing json is useful to build highly efficient parsing operations, for
    13  // example when doing tranformations on-the-fly where as the program reads the
    14  // input and produces the transformed json to an output buffer.
    15  //
    16  // Here is a common pattern to use a tokenizer:
    17  //
    18  //	for t := json.NewTokenizer(b); t.Next(); {
    19  //		switch k := t.Kind(); k.Class() {
    20  //		case json.Null:
    21  //			...
    22  //		case json.Bool:
    23  //			...
    24  //		case json.Num:
    25  //			...
    26  //		case json.String:
    27  //			...
    28  //		case json.Array:
    29  //			...
    30  //		case json.Object:
    31  //			...
    32  //		}
    33  //	}
    34  //
    35  type Tokenizer struct {
    36  	// When the tokenizer is positioned on a json delimiter this field is not
    37  	// zero. In this case the possible values are '{', '}', '[', ']', ':', and
    38  	// ','.
    39  	Delim Delim
    40  
    41  	// This field contains the raw json token that the tokenizer is pointing at.
    42  	// When Delim is not zero, this field is a single-element byte slice
    43  	// continaing the delimiter value. Otherwise, this field holds values like
    44  	// null, true, false, numbers, or quoted strings.
    45  	Value RawValue
    46  
    47  	// When the tokenizer has encountered invalid content this field is not nil.
    48  	Err error
    49  
    50  	// When the value is in an array or an object, this field contains the depth
    51  	// at which it was found.
    52  	Depth int
    53  
    54  	// When the value is in an array or an object, this field contains the
    55  	// position at which it was found.
    56  	Index int
    57  
    58  	// This field is true when the value is the key of an object.
    59  	IsKey bool
    60  
    61  	// Tells whether the next value read from the tokenizer is a key.
    62  	isKey bool
    63  
    64  	// json input for the tokenizer, pointing at data right after the last token
    65  	// that was parsed.
    66  	json []byte
    67  
    68  	// Stack used to track entering and leaving arrays, objects, and keys.
    69  	stack *stack
    70  
    71  	// Decoder used for parsing.
    72  	decoder
    73  }
    74  
    75  // NewTokenizer constructs a new Tokenizer which reads its json input from b.
    76  func NewTokenizer(b []byte) *Tokenizer {
    77  	return &Tokenizer{
    78  		json:    b,
    79  		decoder: decoder{flags: internalParseFlags(b)},
    80  	}
    81  }
    82  
    83  // Reset erases the state of t and re-initializes it with the json input from b.
    84  func (t *Tokenizer) Reset(b []byte) {
    85  	if t.stack != nil {
    86  		releaseStack(t.stack)
    87  	}
    88  	// This code is similar to:
    89  	//
    90  	//	*t = Tokenizer{json: b}
    91  	//
    92  	// However, it does not compile down to an invocation of duff-copy.
    93  	t.Delim = 0
    94  	t.Value = nil
    95  	t.Err = nil
    96  	t.Depth = 0
    97  	t.Index = 0
    98  	t.IsKey = false
    99  	t.isKey = false
   100  	t.json = b
   101  	t.stack = nil
   102  	t.decoder = decoder{flags: internalParseFlags(b)}
   103  }
   104  
   105  // Next returns a new tokenizer pointing at the next token, or the zero-value of
   106  // Tokenizer if the end of the json input has been reached.
   107  //
   108  // If the tokenizer encounters malformed json while reading the input the method
   109  // sets t.Err to an error describing the issue, and returns false. Once an error
   110  // has been encountered, the tokenizer will always fail until its input is
   111  // cleared by a call to its Reset method.
   112  func (t *Tokenizer) Next() bool {
   113  	if t.Err != nil {
   114  		return false
   115  	}
   116  
   117  	// Inlined code of the skipSpaces function, this give a ~15% speed boost.
   118  	i := 0
   119  skipLoop:
   120  	for _, c := range t.json {
   121  		switch c {
   122  		case sp, ht, nl, cr:
   123  			i++
   124  		default:
   125  			break skipLoop
   126  		}
   127  	}
   128  
   129  	if i > 0 {
   130  		t.json = t.json[i:]
   131  	}
   132  
   133  	if len(t.json) == 0 {
   134  		t.Reset(nil)
   135  		return false
   136  	}
   137  
   138  	var kind Kind
   139  	switch t.json[0] {
   140  	case '"':
   141  		t.Delim = 0
   142  		t.Value, t.json, kind, t.Err = t.parseString(t.json)
   143  	case 'n':
   144  		t.Delim = 0
   145  		t.Value, t.json, kind, t.Err = t.parseNull(t.json)
   146  	case 't':
   147  		t.Delim = 0
   148  		t.Value, t.json, kind, t.Err = t.parseTrue(t.json)
   149  	case 'f':
   150  		t.Delim = 0
   151  		t.Value, t.json, kind, t.Err = t.parseFalse(t.json)
   152  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   153  		t.Delim = 0
   154  		t.Value, t.json, kind, t.Err = t.parseNumber(t.json)
   155  	case '{', '}', '[', ']', ':', ',':
   156  		t.Delim, t.Value, t.json = Delim(t.json[0]), t.json[:1], t.json[1:]
   157  		switch t.Delim {
   158  		case '{':
   159  			kind = Object
   160  		case '[':
   161  			kind = Array
   162  		}
   163  	default:
   164  		t.Delim = 0
   165  		t.Value, t.json, t.Err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
   166  	}
   167  
   168  	t.Depth = t.depth()
   169  	t.Index = t.index()
   170  	t.flags = t.flags.withKind(kind)
   171  
   172  	if t.Delim == 0 {
   173  		t.IsKey = t.isKey
   174  	} else {
   175  		t.IsKey = false
   176  
   177  		switch t.Delim {
   178  		case '{':
   179  			t.isKey = true
   180  			t.push(inObject)
   181  		case '[':
   182  			t.push(inArray)
   183  		case '}':
   184  			t.Err = t.pop(inObject)
   185  			t.Depth--
   186  			t.Index = t.index()
   187  		case ']':
   188  			t.Err = t.pop(inArray)
   189  			t.Depth--
   190  			t.Index = t.index()
   191  		case ':':
   192  			t.isKey = false
   193  		case ',':
   194  			if t.stack == nil || len(t.stack.state) == 0 {
   195  				t.Err = syntaxError(t.json, "found unexpected comma")
   196  				return false
   197  			}
   198  			if t.stack.is(inObject) {
   199  				t.isKey = true
   200  			}
   201  			t.stack.state[len(t.stack.state)-1].len++
   202  		}
   203  	}
   204  
   205  	return (t.Delim != 0 || len(t.Value) != 0) && t.Err == nil
   206  }
   207  
   208  func (t *Tokenizer) depth() int {
   209  	if t.stack == nil {
   210  		return 0
   211  	}
   212  	return t.stack.depth()
   213  }
   214  
   215  func (t *Tokenizer) index() int {
   216  	if t.stack == nil {
   217  		return 0
   218  	}
   219  	return t.stack.index()
   220  }
   221  
   222  func (t *Tokenizer) push(typ scope) {
   223  	if t.stack == nil {
   224  		t.stack = acquireStack()
   225  	}
   226  	t.stack.push(typ)
   227  }
   228  
   229  func (t *Tokenizer) pop(expect scope) error {
   230  	if t.stack == nil || !t.stack.pop(expect) {
   231  		return syntaxError(t.json, "found unexpected character while tokenizing json input")
   232  	}
   233  	return nil
   234  }
   235  
   236  // Kind returns the kind of the value that the tokenizer is currently positioned
   237  // on.
   238  func (t *Tokenizer) Kind() Kind { return t.flags.kind() }
   239  
   240  // Bool returns a bool containing the value of the json boolean that the
   241  // tokenizer is currently pointing at.
   242  //
   243  // This method must only be called after checking the kind of the token via a
   244  // call to Kind.
   245  //
   246  // If the tokenizer is not positioned on a boolean, the behavior is undefined.
   247  func (t *Tokenizer) Bool() bool { return t.flags.kind() == True }
   248  
   249  // Int returns a byte slice containing the value of the json number that the
   250  // tokenizer is currently pointing at.
   251  //
   252  // This method must only be called after checking the kind of the token via a
   253  // call to Kind.
   254  //
   255  // If the tokenizer is not positioned on an integer, the behavior is undefined.
   256  func (t *Tokenizer) Int() int64 {
   257  	i, _, _ := t.parseInt(t.Value, int64Type)
   258  	return i
   259  }
   260  
   261  // Uint returns a byte slice containing the value of the json number that the
   262  // tokenizer is currently pointing at.
   263  //
   264  // This method must only be called after checking the kind of the token via a
   265  // call to Kind.
   266  //
   267  // If the tokenizer is not positioned on a positive integer, the behavior is
   268  // undefined.
   269  func (t *Tokenizer) Uint() uint64 {
   270  	u, _, _ := t.parseUint(t.Value, uint64Type)
   271  	return u
   272  }
   273  
   274  // Float returns a byte slice containing the value of the json number that the
   275  // tokenizer is currently pointing at.
   276  //
   277  // This method must only be called after checking the kind of the token via a
   278  // call to Kind.
   279  //
   280  // If the tokenizer is not positioned on a number, the behavior is undefined.
   281  func (t *Tokenizer) Float() float64 {
   282  	f, _ := strconv.ParseFloat(*(*string)(unsafe.Pointer(&t.Value)), 64)
   283  	return f
   284  }
   285  
   286  // String returns a byte slice containing the value of the json string that the
   287  // tokenizer is currently pointing at.
   288  //
   289  // This method must only be called after checking the kind of the token via a
   290  // call to Kind.
   291  //
   292  // When possible, the returned byte slice references the backing array of the
   293  // tokenizer. A new slice is only allocated if the tokenizer needed to unescape
   294  // the json string.
   295  //
   296  // If the tokenizer is not positioned on a string, the behavior is undefined.
   297  func (t *Tokenizer) String() []byte {
   298  	if t.flags.kind() == Unescaped && len(t.Value) > 1 {
   299  		return t.Value[1 : len(t.Value)-1] // unquote
   300  	}
   301  	s, _, _, _ := t.parseStringUnquote(t.Value, nil)
   302  	return s
   303  }
   304  
   305  // Remaining returns the number of bytes left to parse.
   306  func (t *Tokenizer) Remaining() int {
   307  	return len(t.json)
   308  }
   309  
   310  // RawValue represents a raw json value, it is intended to carry null, true,
   311  // false, number, and string values only.
   312  type RawValue []byte
   313  
   314  // String returns true if v contains a string value.
   315  func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
   316  
   317  // Null returns true if v contains a null value.
   318  func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
   319  
   320  // True returns true if v contains a true value.
   321  func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
   322  
   323  // False returns true if v contains a false value.
   324  func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
   325  
   326  // Number returns true if v contains a number value.
   327  func (v RawValue) Number() bool {
   328  	if len(v) != 0 {
   329  		switch v[0] {
   330  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   331  			return true
   332  		}
   333  	}
   334  	return false
   335  }
   336  
   337  // AppendUnquote writes the unquoted version of the string value in v into b.
   338  func (v RawValue) AppendUnquote(b []byte) []byte {
   339  	d := decoder{}
   340  	s, r, _, err := d.parseStringUnquote(v, b)
   341  	if err != nil {
   342  		panic(err)
   343  	}
   344  	if len(r) != 0 {
   345  		panic(syntaxError(r, "unexpected trailing tokens after json value"))
   346  	}
   347  	return append(b, s...)
   348  }
   349  
   350  // Unquote returns the unquoted version of the string value in v.
   351  func (v RawValue) Unquote() []byte {
   352  	return v.AppendUnquote(nil)
   353  }
   354  
   355  type scope int
   356  
   357  const (
   358  	inArray scope = iota
   359  	inObject
   360  )
   361  
   362  type state struct {
   363  	typ scope
   364  	len int
   365  }
   366  
   367  type stack struct {
   368  	state []state
   369  }
   370  
   371  func (s *stack) push(typ scope) {
   372  	s.state = append(s.state, state{typ: typ, len: 1})
   373  }
   374  
   375  func (s *stack) pop(expect scope) bool {
   376  	i := len(s.state) - 1
   377  
   378  	if i < 0 {
   379  		return false
   380  	}
   381  
   382  	if found := s.state[i]; expect != found.typ {
   383  		return false
   384  	}
   385  
   386  	s.state = s.state[:i]
   387  	return true
   388  }
   389  
   390  func (s *stack) is(typ scope) bool {
   391  	return len(s.state) != 0 && s.state[len(s.state)-1].typ == typ
   392  }
   393  
   394  func (s *stack) depth() int {
   395  	return len(s.state)
   396  }
   397  
   398  func (s *stack) index() int {
   399  	if len(s.state) == 0 {
   400  		return 0
   401  	}
   402  	return s.state[len(s.state)-1].len - 1
   403  }
   404  
   405  func acquireStack() *stack {
   406  	s, _ := stackPool.Get().(*stack)
   407  	if s == nil {
   408  		s = &stack{state: make([]state, 0, 4)}
   409  	} else {
   410  		s.state = s.state[:0]
   411  	}
   412  	return s
   413  }
   414  
   415  func releaseStack(s *stack) {
   416  	stackPool.Put(s)
   417  }
   418  
   419  var (
   420  	stackPool sync.Pool // *stack
   421  )