github.com/neilotoole/jsoncolor@v0.7.2-0.20231115150201-1637fae69be1/token.go (about)

     1  package jsoncolor
     2  
     3  // Tokenizer is an iterator-style type which can be used to progressively parse
     4  // through a json input.
     5  //
     6  // Tokenizing json is useful to build highly efficient parsing operations, for
     7  // example when doing tranformations on-the-fly where as the program reads the
     8  // input and produces the transformed json to an output buffer.
     9  //
    10  // Here is a common pattern to use a tokenizer:
    11  //
    12  //	for t := json.NewTokenizer(b); t.Next(); {
    13  //		switch t.Delim {
    14  //		case '{':
    15  //			...
    16  //		case '}':
    17  //			...
    18  //		case '[':
    19  //			...
    20  //		case ']':
    21  //			...
    22  //		case ':':
    23  //			...
    24  //		case ',':
    25  //			...
    26  //		}
    27  //
    28  //		switch {
    29  //		case t.Value.String():
    30  //			...
    31  //		case t.Value.Null():
    32  //			...
    33  //		case t.Value.True():
    34  //			...
    35  //		case t.Value.False():
    36  //			...
    37  //		case t.Value.Number():
    38  //			...
    39  //		}
    40  //	}
    41  type Tokenizer struct {
    42  	// When the tokenizer is positioned on a json delimiter this field is not
    43  	// zero. In this case the possible values are '{', '}', '[', ']', ':', and
    44  	// ','.
    45  	Delim Delim
    46  
    47  	// This field contains the raw json token that the tokenizer is pointing at.
    48  	// When Delim is not zero, this field is a single-element byte slice
    49  	// continaing the delimiter value. Otherwise, this field holds values like
    50  	// null, true, false, numbers, or quoted strings.
    51  	Value RawValue
    52  
    53  	// When the tokenizer has encountered invalid content this field is not nil.
    54  	Err error
    55  
    56  	// When the value is in an array or an object, this field contains the depth
    57  	// at which it was found.
    58  	Depth int
    59  
    60  	// When the value is in an array or an object, this field contains the
    61  	// position at which it was found.
    62  	Index int
    63  
    64  	// This field is true when the value is the key of an object.
    65  	IsKey bool
    66  
    67  	// Tells whether the next value read from the tokenizer is a key.
    68  	isKey bool
    69  
    70  	// json input for the tokenizer, pointing at data right after the last token
    71  	// that was parsed.
    72  	json []byte
    73  
    74  	// Stack used to track entering and leaving arrays, objects, and keys. The
    75  	// buffer is used as a AppendPre-allocated space to
    76  	stack  []state
    77  	buffer [8]state
    78  }
    79  
    80  type state struct {
    81  	typ scope
    82  	len int
    83  }
    84  
    85  type scope int
    86  
    87  const (
    88  	inArray scope = iota
    89  	inObject
    90  )
    91  
    92  // NewTokenizer constructs a new Tokenizer which reads its json input from b.
    93  func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} }
    94  
    95  // Reset erases the state of t and re-initializes it with the json input from b.
    96  func (t *Tokenizer) Reset(b []byte) {
    97  	// This code is similar to:
    98  	//
    99  	//	*t = Tokenizer{json: b}
   100  	//
   101  	// However, it does not compile down to an invocation of duff-copy, which
   102  	// ends up being slower and prevents the code from being inlined.
   103  	t.Delim = 0
   104  	t.Value = nil
   105  	t.Err = nil
   106  	t.Depth = 0
   107  	t.Index = 0
   108  	t.IsKey = false
   109  	t.isKey = false
   110  	t.json = b
   111  	t.stack = nil
   112  }
   113  
   114  // Next returns a new tokenizer pointing at the next token, or the zero-value of
   115  // Tokenizer if the end of the json input has been reached.
   116  //
   117  // If the tokenizer encounters malformed json while reading the input the method
   118  // sets t.Err to an error describing the issue, and returns false. Once an error
   119  // has been encountered, the tokenizer will always fail until its input is
   120  // cleared by a call to its Reset method.
   121  func (t *Tokenizer) Next() bool {
   122  	if t.Err != nil {
   123  		return false
   124  	}
   125  
   126  	// Inlined code of the skipSpaces function, this give a ~15% speed boost.
   127  	i := 0
   128  skipLoop:
   129  	for _, c := range t.json {
   130  		switch c {
   131  		case sp, ht, nl, cr:
   132  			i++
   133  		default:
   134  			break skipLoop
   135  		}
   136  	}
   137  
   138  	if t.json = t.json[i:]; len(t.json) == 0 {
   139  		t.Reset(nil)
   140  		return false
   141  	}
   142  
   143  	var d Delim
   144  	var v []byte
   145  	var b []byte
   146  	var err error
   147  
   148  	switch t.json[0] {
   149  	case '"':
   150  		v, b, err = parseString(t.json)
   151  	case 'n':
   152  		v, b, err = parseNull(t.json)
   153  	case 't':
   154  		v, b, err = parseTrue(t.json)
   155  	case 'f':
   156  		v, b, err = parseFalse(t.json)
   157  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   158  		v, b, err = parseNumber(t.json)
   159  	case '{', '}', '[', ']', ':', ',':
   160  		d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:]
   161  	default:
   162  		v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
   163  	}
   164  
   165  	t.Delim = d
   166  	t.Value = RawValue(v)
   167  	t.Err = err
   168  	t.Depth = t.depth()
   169  	t.Index = t.index()
   170  	t.IsKey = d == 0 && t.isKey
   171  	t.json = b
   172  
   173  	if d != 0 {
   174  		switch d {
   175  		case '{':
   176  			t.isKey = true
   177  			t.push(inObject)
   178  		case '[':
   179  			t.push(inArray)
   180  		case '}':
   181  			err = t.pop(inObject)
   182  			t.Depth--
   183  			t.Index = t.index()
   184  		case ']':
   185  			err = t.pop(inArray)
   186  			t.Depth--
   187  			t.Index = t.index()
   188  		case ':':
   189  			t.isKey = false
   190  		case ',':
   191  			if t.is(inObject) {
   192  				t.isKey = true
   193  			}
   194  			t.stack[len(t.stack)-1].len++
   195  		}
   196  	}
   197  
   198  	return (d != 0 || len(v) != 0) && err == nil
   199  }
   200  
   201  func (t *Tokenizer) push(typ scope) {
   202  	if t.stack == nil {
   203  		t.stack = t.buffer[:0]
   204  	}
   205  	t.stack = append(t.stack, state{typ: typ, len: 1})
   206  }
   207  
   208  func (t *Tokenizer) pop(expect scope) error {
   209  	i := len(t.stack) - 1
   210  
   211  	if i < 0 {
   212  		return syntaxError(t.json, "found unexpected character while tokenizing json input")
   213  	}
   214  
   215  	if found := t.stack[i]; expect != found.typ {
   216  		return syntaxError(t.json, "found unexpected character while tokenizing json input")
   217  	}
   218  
   219  	t.stack = t.stack[:i]
   220  	return nil
   221  }
   222  
   223  func (t *Tokenizer) is(typ scope) bool {
   224  	return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ
   225  }
   226  
   227  func (t *Tokenizer) depth() int {
   228  	return len(t.stack)
   229  }
   230  
   231  func (t *Tokenizer) index() int {
   232  	if len(t.stack) == 0 {
   233  		return 0
   234  	}
   235  	return t.stack[len(t.stack)-1].len - 1
   236  }
   237  
   238  // RawValue represents a raw json value, it is intended to carry null, true,
   239  // false, number, and string values only.
   240  type RawValue []byte
   241  
   242  // String returns true if v contains a string value.
   243  func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
   244  
   245  // Null returns true if v contains a null value.
   246  func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
   247  
   248  // True returns true if v contains a true value.
   249  func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
   250  
   251  // False returns true if v contains a false value.
   252  func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
   253  
   254  // Number returns true if v contains a number value.
   255  func (v RawValue) Number() bool {
   256  	if len(v) != 0 {
   257  		switch v[0] {
   258  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   259  			return true
   260  		}
   261  	}
   262  	return false
   263  }
   264  
   265  // AppendUnquote writes the unquoted version of the string value in v into b.
   266  func (v RawValue) AppendUnquote(b []byte) []byte {
   267  	s, r, isNew, err := parseStringUnquote([]byte(v), b)
   268  	if err != nil {
   269  		panic(err)
   270  	}
   271  	if len(r) != 0 {
   272  		panic(syntaxError(r, "unexpected trailing tokens after json value"))
   273  	}
   274  	if isNew {
   275  		b = s
   276  	} else {
   277  		b = append(b, s...)
   278  	}
   279  	return b
   280  }
   281  
   282  // Unquote returns the unquoted version of the string value in v.
   283  func (v RawValue) Unquote() []byte {
   284  	return v.AppendUnquote(nil)
   285  }