github.com/neilotoole/jsoncolor@v0.7.2-0.20231115150201-1637fae69be1/token.go (about) 1 package jsoncolor 2 3 // Tokenizer is an iterator-style type which can be used to progressively parse 4 // through a json input. 5 // 6 // Tokenizing json is useful to build highly efficient parsing operations, for 7 // example when doing tranformations on-the-fly where as the program reads the 8 // input and produces the transformed json to an output buffer. 9 // 10 // Here is a common pattern to use a tokenizer: 11 // 12 // for t := json.NewTokenizer(b); t.Next(); { 13 // switch t.Delim { 14 // case '{': 15 // ... 16 // case '}': 17 // ... 18 // case '[': 19 // ... 20 // case ']': 21 // ... 22 // case ':': 23 // ... 24 // case ',': 25 // ... 26 // } 27 // 28 // switch { 29 // case t.Value.String(): 30 // ... 31 // case t.Value.Null(): 32 // ... 33 // case t.Value.True(): 34 // ... 35 // case t.Value.False(): 36 // ... 37 // case t.Value.Number(): 38 // ... 39 // } 40 // } 41 type Tokenizer struct { 42 // When the tokenizer is positioned on a json delimiter this field is not 43 // zero. In this case the possible values are '{', '}', '[', ']', ':', and 44 // ','. 45 Delim Delim 46 47 // This field contains the raw json token that the tokenizer is pointing at. 48 // When Delim is not zero, this field is a single-element byte slice 49 // continaing the delimiter value. Otherwise, this field holds values like 50 // null, true, false, numbers, or quoted strings. 51 Value RawValue 52 53 // When the tokenizer has encountered invalid content this field is not nil. 54 Err error 55 56 // When the value is in an array or an object, this field contains the depth 57 // at which it was found. 58 Depth int 59 60 // When the value is in an array or an object, this field contains the 61 // position at which it was found. 62 Index int 63 64 // This field is true when the value is the key of an object. 65 IsKey bool 66 67 // Tells whether the next value read from the tokenizer is a key. 68 isKey bool 69 70 // json input for the tokenizer, pointing at data right after the last token 71 // that was parsed. 72 json []byte 73 74 // Stack used to track entering and leaving arrays, objects, and keys. The 75 // buffer is used as a AppendPre-allocated space to 76 stack []state 77 buffer [8]state 78 } 79 80 type state struct { 81 typ scope 82 len int 83 } 84 85 type scope int 86 87 const ( 88 inArray scope = iota 89 inObject 90 ) 91 92 // NewTokenizer constructs a new Tokenizer which reads its json input from b. 93 func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} } 94 95 // Reset erases the state of t and re-initializes it with the json input from b. 96 func (t *Tokenizer) Reset(b []byte) { 97 // This code is similar to: 98 // 99 // *t = Tokenizer{json: b} 100 // 101 // However, it does not compile down to an invocation of duff-copy, which 102 // ends up being slower and prevents the code from being inlined. 103 t.Delim = 0 104 t.Value = nil 105 t.Err = nil 106 t.Depth = 0 107 t.Index = 0 108 t.IsKey = false 109 t.isKey = false 110 t.json = b 111 t.stack = nil 112 } 113 114 // Next returns a new tokenizer pointing at the next token, or the zero-value of 115 // Tokenizer if the end of the json input has been reached. 116 // 117 // If the tokenizer encounters malformed json while reading the input the method 118 // sets t.Err to an error describing the issue, and returns false. Once an error 119 // has been encountered, the tokenizer will always fail until its input is 120 // cleared by a call to its Reset method. 121 func (t *Tokenizer) Next() bool { 122 if t.Err != nil { 123 return false 124 } 125 126 // Inlined code of the skipSpaces function, this give a ~15% speed boost. 127 i := 0 128 skipLoop: 129 for _, c := range t.json { 130 switch c { 131 case sp, ht, nl, cr: 132 i++ 133 default: 134 break skipLoop 135 } 136 } 137 138 if t.json = t.json[i:]; len(t.json) == 0 { 139 t.Reset(nil) 140 return false 141 } 142 143 var d Delim 144 var v []byte 145 var b []byte 146 var err error 147 148 switch t.json[0] { 149 case '"': 150 v, b, err = parseString(t.json) 151 case 'n': 152 v, b, err = parseNull(t.json) 153 case 't': 154 v, b, err = parseTrue(t.json) 155 case 'f': 156 v, b, err = parseFalse(t.json) 157 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 158 v, b, err = parseNumber(t.json) 159 case '{', '}', '[', ']', ':', ',': 160 d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:] 161 default: 162 v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0]) 163 } 164 165 t.Delim = d 166 t.Value = RawValue(v) 167 t.Err = err 168 t.Depth = t.depth() 169 t.Index = t.index() 170 t.IsKey = d == 0 && t.isKey 171 t.json = b 172 173 if d != 0 { 174 switch d { 175 case '{': 176 t.isKey = true 177 t.push(inObject) 178 case '[': 179 t.push(inArray) 180 case '}': 181 err = t.pop(inObject) 182 t.Depth-- 183 t.Index = t.index() 184 case ']': 185 err = t.pop(inArray) 186 t.Depth-- 187 t.Index = t.index() 188 case ':': 189 t.isKey = false 190 case ',': 191 if t.is(inObject) { 192 t.isKey = true 193 } 194 t.stack[len(t.stack)-1].len++ 195 } 196 } 197 198 return (d != 0 || len(v) != 0) && err == nil 199 } 200 201 func (t *Tokenizer) push(typ scope) { 202 if t.stack == nil { 203 t.stack = t.buffer[:0] 204 } 205 t.stack = append(t.stack, state{typ: typ, len: 1}) 206 } 207 208 func (t *Tokenizer) pop(expect scope) error { 209 i := len(t.stack) - 1 210 211 if i < 0 { 212 return syntaxError(t.json, "found unexpected character while tokenizing json input") 213 } 214 215 if found := t.stack[i]; expect != found.typ { 216 return syntaxError(t.json, "found unexpected character while tokenizing json input") 217 } 218 219 t.stack = t.stack[:i] 220 return nil 221 } 222 223 func (t *Tokenizer) is(typ scope) bool { 224 return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ 225 } 226 227 func (t *Tokenizer) depth() int { 228 return len(t.stack) 229 } 230 231 func (t *Tokenizer) index() int { 232 if len(t.stack) == 0 { 233 return 0 234 } 235 return t.stack[len(t.stack)-1].len - 1 236 } 237 238 // RawValue represents a raw json value, it is intended to carry null, true, 239 // false, number, and string values only. 240 type RawValue []byte 241 242 // String returns true if v contains a string value. 243 func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' } 244 245 // Null returns true if v contains a null value. 246 func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' } 247 248 // True returns true if v contains a true value. 249 func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' } 250 251 // False returns true if v contains a false value. 252 func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' } 253 254 // Number returns true if v contains a number value. 255 func (v RawValue) Number() bool { 256 if len(v) != 0 { 257 switch v[0] { 258 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 259 return true 260 } 261 } 262 return false 263 } 264 265 // AppendUnquote writes the unquoted version of the string value in v into b. 266 func (v RawValue) AppendUnquote(b []byte) []byte { 267 s, r, isNew, err := parseStringUnquote([]byte(v), b) 268 if err != nil { 269 panic(err) 270 } 271 if len(r) != 0 { 272 panic(syntaxError(r, "unexpected trailing tokens after json value")) 273 } 274 if isNew { 275 b = s 276 } else { 277 b = append(b, s...) 278 } 279 return b 280 } 281 282 // Unquote returns the unquoted version of the string value in v. 283 func (v RawValue) Unquote() []byte { 284 return v.AppendUnquote(nil) 285 }