github.com/neilotoole/jsoncolor@v0.6.0/token.go (about) 1 package jsoncolor 2 3 // Tokenizer is an iterator-style type which can be used to progressively parse 4 // through a json input. 5 // 6 // Tokenizing json is useful to build highly efficient parsing operations, for 7 // example when doing tranformations on-the-fly where as the program reads the 8 // input and produces the transformed json to an output buffer. 9 // 10 // Here is a common pattern to use a tokenizer: 11 // 12 // for t := json.NewTokenizer(b); t.Next(); { 13 // switch t.Delim { 14 // case '{': 15 // ... 16 // case '}': 17 // ... 18 // case '[': 19 // ... 20 // case ']': 21 // ... 22 // case ':': 23 // ... 24 // case ',': 25 // ... 26 // } 27 // 28 // switch { 29 // case t.Value.String(): 30 // ... 31 // case t.Value.Null(): 32 // ... 33 // case t.Value.True(): 34 // ... 35 // case t.Value.False(): 36 // ... 37 // case t.Value.Number(): 38 // ... 39 // } 40 // } 41 // 42 type Tokenizer struct { 43 // When the tokenizer is positioned on a json delimiter this field is not 44 // zero. In this case the possible values are '{', '}', '[', ']', ':', and 45 // ','. 46 Delim Delim 47 48 // This field contains the raw json token that the tokenizer is pointing at. 49 // When Delim is not zero, this field is a single-element byte slice 50 // continaing the delimiter value. Otherwise, this field holds values like 51 // null, true, false, numbers, or quoted strings. 52 Value RawValue 53 54 // When the tokenizer has encountered invalid content this field is not nil. 55 Err error 56 57 // When the value is in an array or an object, this field contains the depth 58 // at which it was found. 59 Depth int 60 61 // When the value is in an array or an object, this field contains the 62 // position at which it was found. 63 Index int 64 65 // This field is true when the value is the key of an object. 66 IsKey bool 67 68 // Tells whether the next value read from the tokenizer is a key. 69 isKey bool 70 71 // json input for the tokenizer, pointing at data right after the last token 72 // that was parsed. 73 json []byte 74 75 // Stack used to track entering and leaving arrays, objects, and keys. The 76 // buffer is used as a AppendPre-allocated space to 77 stack []state 78 buffer [8]state 79 } 80 81 type state struct { 82 typ scope 83 len int 84 } 85 86 type scope int 87 88 const ( 89 inArray scope = iota 90 inObject 91 ) 92 93 // NewTokenizer constructs a new Tokenizer which reads its json input from b. 94 func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} } 95 96 // Reset erases the state of t and re-initializes it with the json input from b. 97 func (t *Tokenizer) Reset(b []byte) { 98 // This code is similar to: 99 // 100 // *t = Tokenizer{json: b} 101 // 102 // However, it does not compile down to an invocation of duff-copy, which 103 // ends up being slower and prevents the code from being inlined. 104 t.Delim = 0 105 t.Value = nil 106 t.Err = nil 107 t.Depth = 0 108 t.Index = 0 109 t.IsKey = false 110 t.isKey = false 111 t.json = b 112 t.stack = nil 113 } 114 115 // Next returns a new tokenizer pointing at the next token, or the zero-value of 116 // Tokenizer if the end of the json input has been reached. 117 // 118 // If the tokenizer encounters malformed json while reading the input the method 119 // sets t.Err to an error describing the issue, and returns false. Once an error 120 // has been encountered, the tokenizer will always fail until its input is 121 // cleared by a call to its Reset method. 122 func (t *Tokenizer) Next() bool { 123 if t.Err != nil { 124 return false 125 } 126 127 // Inlined code of the skipSpaces function, this give a ~15% speed boost. 128 i := 0 129 skipLoop: 130 for _, c := range t.json { 131 switch c { 132 case sp, ht, nl, cr: 133 i++ 134 default: 135 break skipLoop 136 } 137 } 138 139 if t.json = t.json[i:]; len(t.json) == 0 { 140 t.Reset(nil) 141 return false 142 } 143 144 var d Delim 145 var v []byte 146 var b []byte 147 var err error 148 149 switch t.json[0] { 150 case '"': 151 v, b, err = parseString(t.json) 152 case 'n': 153 v, b, err = parseNull(t.json) 154 case 't': 155 v, b, err = parseTrue(t.json) 156 case 'f': 157 v, b, err = parseFalse(t.json) 158 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 159 v, b, err = parseNumber(t.json) 160 case '{', '}', '[', ']', ':', ',': 161 d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:] 162 default: 163 v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0]) 164 } 165 166 t.Delim = d 167 t.Value = RawValue(v) 168 t.Err = err 169 t.Depth = t.depth() 170 t.Index = t.index() 171 t.IsKey = d == 0 && t.isKey 172 t.json = b 173 174 if d != 0 { 175 switch d { 176 case '{': 177 t.isKey = true 178 t.push(inObject) 179 case '[': 180 t.push(inArray) 181 case '}': 182 err = t.pop(inObject) 183 t.Depth-- 184 t.Index = t.index() 185 case ']': 186 err = t.pop(inArray) 187 t.Depth-- 188 t.Index = t.index() 189 case ':': 190 t.isKey = false 191 case ',': 192 if t.is(inObject) { 193 t.isKey = true 194 } 195 t.stack[len(t.stack)-1].len++ 196 } 197 } 198 199 return (d != 0 || len(v) != 0) && err == nil 200 } 201 202 func (t *Tokenizer) push(typ scope) { 203 if t.stack == nil { 204 t.stack = t.buffer[:0] 205 } 206 t.stack = append(t.stack, state{typ: typ, len: 1}) 207 } 208 209 func (t *Tokenizer) pop(expect scope) error { 210 i := len(t.stack) - 1 211 212 if i < 0 { 213 return syntaxError(t.json, "found unexpected character while tokenizing json input") 214 } 215 216 if found := t.stack[i]; expect != found.typ { 217 return syntaxError(t.json, "found unexpected character while tokenizing json input") 218 } 219 220 t.stack = t.stack[:i] 221 return nil 222 } 223 224 func (t *Tokenizer) is(typ scope) bool { 225 return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ 226 } 227 228 func (t *Tokenizer) depth() int { 229 return len(t.stack) 230 } 231 232 func (t *Tokenizer) index() int { 233 if len(t.stack) == 0 { 234 return 0 235 } 236 return t.stack[len(t.stack)-1].len - 1 237 } 238 239 // RawValue represents a raw json value, it is intended to carry null, true, 240 // false, number, and string values only. 241 type RawValue []byte 242 243 // String returns true if v contains a string value. 244 func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' } 245 246 // Null returns true if v contains a null value. 247 func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' } 248 249 // True returns true if v contains a true value. 250 func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' } 251 252 // False returns true if v contains a false value. 253 func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' } 254 255 // Number returns true if v contains a number value. 256 func (v RawValue) Number() bool { 257 if len(v) != 0 { 258 switch v[0] { 259 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 260 return true 261 } 262 } 263 return false 264 } 265 266 // AppendUnquote writes the unquoted version of the string value in v into b. 267 func (v RawValue) AppendUnquote(b []byte) []byte { 268 s, r, new, err := parseStringUnquote([]byte(v), b) 269 if err != nil { 270 panic(err) 271 } 272 if len(r) != 0 { 273 panic(syntaxError(r, "unexpected trailing tokens after json value")) 274 } 275 if new { 276 b = s 277 } else { 278 b = append(b, s...) 279 } 280 return b 281 } 282 283 // Unquote returns the unquoted version of the string value in v. 284 func (v RawValue) Unquote() []byte { 285 return v.AppendUnquote(nil) 286 }