github.com/segmentio/encoding@v0.3.6/json/token.go (about) 1 package json 2 3 import ( 4 "strconv" 5 "sync" 6 "unsafe" 7 ) 8 9 // Tokenizer is an iterator-style type which can be used to progressively parse 10 // through a json input. 11 // 12 // Tokenizing json is useful to build highly efficient parsing operations, for 13 // example when doing tranformations on-the-fly where as the program reads the 14 // input and produces the transformed json to an output buffer. 15 // 16 // Here is a common pattern to use a tokenizer: 17 // 18 // for t := json.NewTokenizer(b); t.Next(); { 19 // switch k := t.Kind(); k.Class() { 20 // case json.Null: 21 // ... 22 // case json.Bool: 23 // ... 24 // case json.Num: 25 // ... 26 // case json.String: 27 // ... 28 // case json.Array: 29 // ... 30 // case json.Object: 31 // ... 32 // } 33 // } 34 // 35 type Tokenizer struct { 36 // When the tokenizer is positioned on a json delimiter this field is not 37 // zero. In this case the possible values are '{', '}', '[', ']', ':', and 38 // ','. 39 Delim Delim 40 41 // This field contains the raw json token that the tokenizer is pointing at. 42 // When Delim is not zero, this field is a single-element byte slice 43 // continaing the delimiter value. Otherwise, this field holds values like 44 // null, true, false, numbers, or quoted strings. 45 Value RawValue 46 47 // When the tokenizer has encountered invalid content this field is not nil. 48 Err error 49 50 // When the value is in an array or an object, this field contains the depth 51 // at which it was found. 52 Depth int 53 54 // When the value is in an array or an object, this field contains the 55 // position at which it was found. 56 Index int 57 58 // This field is true when the value is the key of an object. 59 IsKey bool 60 61 // Tells whether the next value read from the tokenizer is a key. 62 isKey bool 63 64 // json input for the tokenizer, pointing at data right after the last token 65 // that was parsed. 66 json []byte 67 68 // Stack used to track entering and leaving arrays, objects, and keys. 69 stack *stack 70 71 // Decoder used for parsing. 72 decoder 73 } 74 75 // NewTokenizer constructs a new Tokenizer which reads its json input from b. 76 func NewTokenizer(b []byte) *Tokenizer { 77 return &Tokenizer{ 78 json: b, 79 decoder: decoder{flags: internalParseFlags(b)}, 80 } 81 } 82 83 // Reset erases the state of t and re-initializes it with the json input from b. 84 func (t *Tokenizer) Reset(b []byte) { 85 if t.stack != nil { 86 releaseStack(t.stack) 87 } 88 // This code is similar to: 89 // 90 // *t = Tokenizer{json: b} 91 // 92 // However, it does not compile down to an invocation of duff-copy. 93 t.Delim = 0 94 t.Value = nil 95 t.Err = nil 96 t.Depth = 0 97 t.Index = 0 98 t.IsKey = false 99 t.isKey = false 100 t.json = b 101 t.stack = nil 102 t.decoder = decoder{flags: internalParseFlags(b)} 103 } 104 105 // Next returns a new tokenizer pointing at the next token, or the zero-value of 106 // Tokenizer if the end of the json input has been reached. 107 // 108 // If the tokenizer encounters malformed json while reading the input the method 109 // sets t.Err to an error describing the issue, and returns false. Once an error 110 // has been encountered, the tokenizer will always fail until its input is 111 // cleared by a call to its Reset method. 112 func (t *Tokenizer) Next() bool { 113 if t.Err != nil { 114 return false 115 } 116 117 // Inlined code of the skipSpaces function, this give a ~15% speed boost. 118 i := 0 119 skipLoop: 120 for _, c := range t.json { 121 switch c { 122 case sp, ht, nl, cr: 123 i++ 124 default: 125 break skipLoop 126 } 127 } 128 129 if i > 0 { 130 t.json = t.json[i:] 131 } 132 133 if len(t.json) == 0 { 134 t.Reset(nil) 135 return false 136 } 137 138 var kind Kind 139 switch t.json[0] { 140 case '"': 141 t.Delim = 0 142 t.Value, t.json, kind, t.Err = t.parseString(t.json) 143 case 'n': 144 t.Delim = 0 145 t.Value, t.json, kind, t.Err = t.parseNull(t.json) 146 case 't': 147 t.Delim = 0 148 t.Value, t.json, kind, t.Err = t.parseTrue(t.json) 149 case 'f': 150 t.Delim = 0 151 t.Value, t.json, kind, t.Err = t.parseFalse(t.json) 152 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 153 t.Delim = 0 154 t.Value, t.json, kind, t.Err = t.parseNumber(t.json) 155 case '{', '}', '[', ']', ':', ',': 156 t.Delim, t.Value, t.json = Delim(t.json[0]), t.json[:1], t.json[1:] 157 switch t.Delim { 158 case '{': 159 kind = Object 160 case '[': 161 kind = Array 162 } 163 default: 164 t.Delim = 0 165 t.Value, t.json, t.Err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0]) 166 } 167 168 t.Depth = t.depth() 169 t.Index = t.index() 170 t.flags = t.flags.withKind(kind) 171 172 if t.Delim == 0 { 173 t.IsKey = t.isKey 174 } else { 175 t.IsKey = false 176 177 switch t.Delim { 178 case '{': 179 t.isKey = true 180 t.push(inObject) 181 case '[': 182 t.push(inArray) 183 case '}': 184 t.Err = t.pop(inObject) 185 t.Depth-- 186 t.Index = t.index() 187 case ']': 188 t.Err = t.pop(inArray) 189 t.Depth-- 190 t.Index = t.index() 191 case ':': 192 t.isKey = false 193 case ',': 194 if t.stack == nil || len(t.stack.state) == 0 { 195 t.Err = syntaxError(t.json, "found unexpected comma") 196 return false 197 } 198 if t.stack.is(inObject) { 199 t.isKey = true 200 } 201 t.stack.state[len(t.stack.state)-1].len++ 202 } 203 } 204 205 return (t.Delim != 0 || len(t.Value) != 0) && t.Err == nil 206 } 207 208 func (t *Tokenizer) depth() int { 209 if t.stack == nil { 210 return 0 211 } 212 return t.stack.depth() 213 } 214 215 func (t *Tokenizer) index() int { 216 if t.stack == nil { 217 return 0 218 } 219 return t.stack.index() 220 } 221 222 func (t *Tokenizer) push(typ scope) { 223 if t.stack == nil { 224 t.stack = acquireStack() 225 } 226 t.stack.push(typ) 227 } 228 229 func (t *Tokenizer) pop(expect scope) error { 230 if t.stack == nil || !t.stack.pop(expect) { 231 return syntaxError(t.json, "found unexpected character while tokenizing json input") 232 } 233 return nil 234 } 235 236 // Kind returns the kind of the value that the tokenizer is currently positioned 237 // on. 238 func (t *Tokenizer) Kind() Kind { return t.flags.kind() } 239 240 // Bool returns a bool containing the value of the json boolean that the 241 // tokenizer is currently pointing at. 242 // 243 // This method must only be called after checking the kind of the token via a 244 // call to Kind. 245 // 246 // If the tokenizer is not positioned on a boolean, the behavior is undefined. 247 func (t *Tokenizer) Bool() bool { return t.flags.kind() == True } 248 249 // Int returns a byte slice containing the value of the json number that the 250 // tokenizer is currently pointing at. 251 // 252 // This method must only be called after checking the kind of the token via a 253 // call to Kind. 254 // 255 // If the tokenizer is not positioned on an integer, the behavior is undefined. 256 func (t *Tokenizer) Int() int64 { 257 i, _, _ := t.parseInt(t.Value, int64Type) 258 return i 259 } 260 261 // Uint returns a byte slice containing the value of the json number that the 262 // tokenizer is currently pointing at. 263 // 264 // This method must only be called after checking the kind of the token via a 265 // call to Kind. 266 // 267 // If the tokenizer is not positioned on a positive integer, the behavior is 268 // undefined. 269 func (t *Tokenizer) Uint() uint64 { 270 u, _, _ := t.parseUint(t.Value, uint64Type) 271 return u 272 } 273 274 // Float returns a byte slice containing the value of the json number that the 275 // tokenizer is currently pointing at. 276 // 277 // This method must only be called after checking the kind of the token via a 278 // call to Kind. 279 // 280 // If the tokenizer is not positioned on a number, the behavior is undefined. 281 func (t *Tokenizer) Float() float64 { 282 f, _ := strconv.ParseFloat(*(*string)(unsafe.Pointer(&t.Value)), 64) 283 return f 284 } 285 286 // String returns a byte slice containing the value of the json string that the 287 // tokenizer is currently pointing at. 288 // 289 // This method must only be called after checking the kind of the token via a 290 // call to Kind. 291 // 292 // When possible, the returned byte slice references the backing array of the 293 // tokenizer. A new slice is only allocated if the tokenizer needed to unescape 294 // the json string. 295 // 296 // If the tokenizer is not positioned on a string, the behavior is undefined. 297 func (t *Tokenizer) String() []byte { 298 if t.flags.kind() == Unescaped && len(t.Value) > 1 { 299 return t.Value[1 : len(t.Value)-1] // unquote 300 } 301 s, _, _, _ := t.parseStringUnquote(t.Value, nil) 302 return s 303 } 304 305 // Remaining returns the number of bytes left to parse. 306 func (t *Tokenizer) Remaining() int { 307 return len(t.json) 308 } 309 310 // RawValue represents a raw json value, it is intended to carry null, true, 311 // false, number, and string values only. 312 type RawValue []byte 313 314 // String returns true if v contains a string value. 315 func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' } 316 317 // Null returns true if v contains a null value. 318 func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' } 319 320 // True returns true if v contains a true value. 321 func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' } 322 323 // False returns true if v contains a false value. 324 func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' } 325 326 // Number returns true if v contains a number value. 327 func (v RawValue) Number() bool { 328 if len(v) != 0 { 329 switch v[0] { 330 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 331 return true 332 } 333 } 334 return false 335 } 336 337 // AppendUnquote writes the unquoted version of the string value in v into b. 338 func (v RawValue) AppendUnquote(b []byte) []byte { 339 d := decoder{} 340 s, r, _, err := d.parseStringUnquote(v, b) 341 if err != nil { 342 panic(err) 343 } 344 if len(r) != 0 { 345 panic(syntaxError(r, "unexpected trailing tokens after json value")) 346 } 347 return append(b, s...) 348 } 349 350 // Unquote returns the unquoted version of the string value in v. 351 func (v RawValue) Unquote() []byte { 352 return v.AppendUnquote(nil) 353 } 354 355 type scope int 356 357 const ( 358 inArray scope = iota 359 inObject 360 ) 361 362 type state struct { 363 typ scope 364 len int 365 } 366 367 type stack struct { 368 state []state 369 } 370 371 func (s *stack) push(typ scope) { 372 s.state = append(s.state, state{typ: typ, len: 1}) 373 } 374 375 func (s *stack) pop(expect scope) bool { 376 i := len(s.state) - 1 377 378 if i < 0 { 379 return false 380 } 381 382 if found := s.state[i]; expect != found.typ { 383 return false 384 } 385 386 s.state = s.state[:i] 387 return true 388 } 389 390 func (s *stack) is(typ scope) bool { 391 return len(s.state) != 0 && s.state[len(s.state)-1].typ == typ 392 } 393 394 func (s *stack) depth() int { 395 return len(s.state) 396 } 397 398 func (s *stack) index() int { 399 if len(s.state) == 0 { 400 return 0 401 } 402 return s.state[len(s.state)-1].len - 1 403 } 404 405 func acquireStack() *stack { 406 s, _ := stackPool.Get().(*stack) 407 if s == nil { 408 s = &stack{state: make([]state, 0, 4)} 409 } else { 410 s.state = s.state[:0] 411 } 412 return s 413 } 414 415 func releaseStack(s *stack) { 416 stackPool.Put(s) 417 } 418 419 var ( 420 stackPool sync.Pool // *stack 421 )