github.com/neilotoole/jsoncolor@v0.6.0/token.go (about)

     1  package jsoncolor
     2  
     3  // Tokenizer is an iterator-style type which can be used to progressively parse
     4  // through a json input.
     5  //
     6  // Tokenizing json is useful to build highly efficient parsing operations, for
     7  // example when doing tranformations on-the-fly where as the program reads the
     8  // input and produces the transformed json to an output buffer.
     9  //
    10  // Here is a common pattern to use a tokenizer:
    11  //
    12  //	for t := json.NewTokenizer(b); t.Next(); {
    13  //		switch t.Delim {
    14  //		case '{':
    15  //			...
    16  //		case '}':
    17  //			...
    18  //		case '[':
    19  //			...
    20  //		case ']':
    21  //			...
    22  //		case ':':
    23  //			...
    24  //		case ',':
    25  //			...
    26  //		}
    27  //
    28  //		switch {
    29  //		case t.Value.String():
    30  //			...
    31  //		case t.Value.Null():
    32  //			...
    33  //		case t.Value.True():
    34  //			...
    35  //		case t.Value.False():
    36  //			...
    37  //		case t.Value.Number():
    38  //			...
    39  //		}
    40  //	}
    41  //
    42  type Tokenizer struct {
    43  	// When the tokenizer is positioned on a json delimiter this field is not
    44  	// zero. In this case the possible values are '{', '}', '[', ']', ':', and
    45  	// ','.
    46  	Delim Delim
    47  
    48  	// This field contains the raw json token that the tokenizer is pointing at.
    49  	// When Delim is not zero, this field is a single-element byte slice
    50  	// continaing the delimiter value. Otherwise, this field holds values like
    51  	// null, true, false, numbers, or quoted strings.
    52  	Value RawValue
    53  
    54  	// When the tokenizer has encountered invalid content this field is not nil.
    55  	Err error
    56  
    57  	// When the value is in an array or an object, this field contains the depth
    58  	// at which it was found.
    59  	Depth int
    60  
    61  	// When the value is in an array or an object, this field contains the
    62  	// position at which it was found.
    63  	Index int
    64  
    65  	// This field is true when the value is the key of an object.
    66  	IsKey bool
    67  
    68  	// Tells whether the next value read from the tokenizer is a key.
    69  	isKey bool
    70  
    71  	// json input for the tokenizer, pointing at data right after the last token
    72  	// that was parsed.
    73  	json []byte
    74  
    75  	// Stack used to track entering and leaving arrays, objects, and keys. The
    76  	// buffer is used as a AppendPre-allocated space to
    77  	stack  []state
    78  	buffer [8]state
    79  }
    80  
    81  type state struct {
    82  	typ scope
    83  	len int
    84  }
    85  
    86  type scope int
    87  
    88  const (
    89  	inArray scope = iota
    90  	inObject
    91  )
    92  
    93  // NewTokenizer constructs a new Tokenizer which reads its json input from b.
    94  func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} }
    95  
    96  // Reset erases the state of t and re-initializes it with the json input from b.
    97  func (t *Tokenizer) Reset(b []byte) {
    98  	// This code is similar to:
    99  	//
   100  	//	*t = Tokenizer{json: b}
   101  	//
   102  	// However, it does not compile down to an invocation of duff-copy, which
   103  	// ends up being slower and prevents the code from being inlined.
   104  	t.Delim = 0
   105  	t.Value = nil
   106  	t.Err = nil
   107  	t.Depth = 0
   108  	t.Index = 0
   109  	t.IsKey = false
   110  	t.isKey = false
   111  	t.json = b
   112  	t.stack = nil
   113  }
   114  
   115  // Next returns a new tokenizer pointing at the next token, or the zero-value of
   116  // Tokenizer if the end of the json input has been reached.
   117  //
   118  // If the tokenizer encounters malformed json while reading the input the method
   119  // sets t.Err to an error describing the issue, and returns false. Once an error
   120  // has been encountered, the tokenizer will always fail until its input is
   121  // cleared by a call to its Reset method.
   122  func (t *Tokenizer) Next() bool {
   123  	if t.Err != nil {
   124  		return false
   125  	}
   126  
   127  	// Inlined code of the skipSpaces function, this give a ~15% speed boost.
   128  	i := 0
   129  skipLoop:
   130  	for _, c := range t.json {
   131  		switch c {
   132  		case sp, ht, nl, cr:
   133  			i++
   134  		default:
   135  			break skipLoop
   136  		}
   137  	}
   138  
   139  	if t.json = t.json[i:]; len(t.json) == 0 {
   140  		t.Reset(nil)
   141  		return false
   142  	}
   143  
   144  	var d Delim
   145  	var v []byte
   146  	var b []byte
   147  	var err error
   148  
   149  	switch t.json[0] {
   150  	case '"':
   151  		v, b, err = parseString(t.json)
   152  	case 'n':
   153  		v, b, err = parseNull(t.json)
   154  	case 't':
   155  		v, b, err = parseTrue(t.json)
   156  	case 'f':
   157  		v, b, err = parseFalse(t.json)
   158  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   159  		v, b, err = parseNumber(t.json)
   160  	case '{', '}', '[', ']', ':', ',':
   161  		d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:]
   162  	default:
   163  		v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
   164  	}
   165  
   166  	t.Delim = d
   167  	t.Value = RawValue(v)
   168  	t.Err = err
   169  	t.Depth = t.depth()
   170  	t.Index = t.index()
   171  	t.IsKey = d == 0 && t.isKey
   172  	t.json = b
   173  
   174  	if d != 0 {
   175  		switch d {
   176  		case '{':
   177  			t.isKey = true
   178  			t.push(inObject)
   179  		case '[':
   180  			t.push(inArray)
   181  		case '}':
   182  			err = t.pop(inObject)
   183  			t.Depth--
   184  			t.Index = t.index()
   185  		case ']':
   186  			err = t.pop(inArray)
   187  			t.Depth--
   188  			t.Index = t.index()
   189  		case ':':
   190  			t.isKey = false
   191  		case ',':
   192  			if t.is(inObject) {
   193  				t.isKey = true
   194  			}
   195  			t.stack[len(t.stack)-1].len++
   196  		}
   197  	}
   198  
   199  	return (d != 0 || len(v) != 0) && err == nil
   200  }
   201  
   202  func (t *Tokenizer) push(typ scope) {
   203  	if t.stack == nil {
   204  		t.stack = t.buffer[:0]
   205  	}
   206  	t.stack = append(t.stack, state{typ: typ, len: 1})
   207  }
   208  
   209  func (t *Tokenizer) pop(expect scope) error {
   210  	i := len(t.stack) - 1
   211  
   212  	if i < 0 {
   213  		return syntaxError(t.json, "found unexpected character while tokenizing json input")
   214  	}
   215  
   216  	if found := t.stack[i]; expect != found.typ {
   217  		return syntaxError(t.json, "found unexpected character while tokenizing json input")
   218  	}
   219  
   220  	t.stack = t.stack[:i]
   221  	return nil
   222  }
   223  
   224  func (t *Tokenizer) is(typ scope) bool {
   225  	return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ
   226  }
   227  
   228  func (t *Tokenizer) depth() int {
   229  	return len(t.stack)
   230  }
   231  
   232  func (t *Tokenizer) index() int {
   233  	if len(t.stack) == 0 {
   234  		return 0
   235  	}
   236  	return t.stack[len(t.stack)-1].len - 1
   237  }
   238  
   239  // RawValue represents a raw json value, it is intended to carry null, true,
   240  // false, number, and string values only.
   241  type RawValue []byte
   242  
   243  // String returns true if v contains a string value.
   244  func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
   245  
   246  // Null returns true if v contains a null value.
   247  func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
   248  
   249  // True returns true if v contains a true value.
   250  func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
   251  
   252  // False returns true if v contains a false value.
   253  func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
   254  
   255  // Number returns true if v contains a number value.
   256  func (v RawValue) Number() bool {
   257  	if len(v) != 0 {
   258  		switch v[0] {
   259  		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   260  			return true
   261  		}
   262  	}
   263  	return false
   264  }
   265  
   266  // AppendUnquote writes the unquoted version of the string value in v into b.
   267  func (v RawValue) AppendUnquote(b []byte) []byte {
   268  	s, r, new, err := parseStringUnquote([]byte(v), b)
   269  	if err != nil {
   270  		panic(err)
   271  	}
   272  	if len(r) != 0 {
   273  		panic(syntaxError(r, "unexpected trailing tokens after json value"))
   274  	}
   275  	if new {
   276  		b = s
   277  	} else {
   278  		b = append(b, s...)
   279  	}
   280  	return b
   281  }
   282  
   283  // Unquote returns the unquoted version of the string value in v.
   284  func (v RawValue) Unquote() []byte {
   285  	return v.AppendUnquote(nil)
   286  }