github.com/segmentio/encoding@v0.4.0/json/token_test.go (about) 1 package json 2 3 import ( 4 "bytes" 5 "reflect" 6 "testing" 7 ) 8 9 type token struct { 10 delim Delim 11 value RawValue 12 err error 13 depth int 14 index int 15 isKey bool 16 } 17 18 func delim(s string, depth, index int) token { 19 return token{ 20 delim: Delim(s[0]), 21 value: RawValue(s), 22 depth: depth, 23 index: index, 24 } 25 } 26 27 func key(v string, depth, index int) token { 28 return token{ 29 value: RawValue(v), 30 depth: depth, 31 index: index, 32 isKey: true, 33 } 34 } 35 36 func value(v string, depth, index int) token { 37 return token{ 38 value: RawValue(v), 39 depth: depth, 40 index: index, 41 } 42 } 43 44 func tokenize(t *testing.T, b []byte) (tokens []token) { 45 tok := NewTokenizer(b) 46 47 for tok.Next() { 48 end := len(b) - tok.Remaining() 49 start := end - len(tok.Value) 50 if end > len(b) { 51 t.Fatalf("token position too far [%d:%d], len(b) is %d", start, end, len(b)) 52 } 53 if !bytes.Equal(b[start:end], tok.Value) { 54 t.Fatalf("token position is wrong [%d:%d]", start, end) 55 } 56 57 tokens = append(tokens, token{ 58 delim: tok.Delim, 59 value: tok.Value, 60 err: tok.Err, 61 depth: tok.Depth, 62 index: tok.Index, 63 isKey: tok.IsKey, 64 }) 65 } 66 67 if tok.Err != nil { 68 t.Fatal(tok.Err) 69 } 70 71 return 72 } 73 74 func TestTokenizer(t *testing.T) { 75 tests := []struct { 76 input []byte 77 tokens []token 78 }{ 79 { 80 input: []byte(`null`), 81 tokens: []token{ 82 value(`null`, 0, 0), 83 }, 84 }, 85 86 { 87 input: []byte(`true`), 88 tokens: []token{ 89 value(`true`, 0, 0), 90 }, 91 }, 92 93 { 94 input: []byte(`false`), 95 tokens: []token{ 96 value(`false`, 0, 0), 97 }, 98 }, 99 100 { 101 input: []byte(`""`), 102 tokens: []token{ 103 value(`""`, 0, 0), 104 }, 105 }, 106 107 { 108 input: []byte(`"Hello World!"`), 109 tokens: []token{ 110 value(`"Hello World!"`, 0, 0), 111 }, 112 }, 113 114 { 115 input: []byte(`-0.1234`), 116 tokens: []token{ 117 value(`-0.1234`, 0, 0), 118 }, 119 }, 120 121 { 122 input: []byte(` { } `), 123 tokens: []token{ 124 delim(`{`, 0, 0), 125 delim(`}`, 0, 0), 126 }, 127 }, 128 129 { 130 input: []byte(`{ "answer": 42 }`), 131 tokens: []token{ 132 delim(`{`, 0, 0), 133 key(`"answer"`, 1, 0), 134 delim(`:`, 1, 0), 135 value(`42`, 1, 0), 136 delim(`}`, 0, 0), 137 }, 138 }, 139 140 { 141 input: []byte(`{ "sub": { "key-A": 1, "key-B": 2, "key-C": 3 } }`), 142 tokens: []token{ 143 delim(`{`, 0, 0), 144 key(`"sub"`, 1, 0), 145 delim(`:`, 1, 0), 146 delim(`{`, 1, 0), 147 key(`"key-A"`, 2, 0), 148 delim(`:`, 2, 0), 149 value(`1`, 2, 0), 150 delim(`,`, 2, 0), 151 key(`"key-B"`, 2, 1), 152 delim(`:`, 2, 1), 153 value(`2`, 2, 1), 154 delim(`,`, 2, 1), 155 key(`"key-C"`, 2, 2), 156 delim(`:`, 2, 2), 157 value(`3`, 2, 2), 158 delim(`}`, 1, 0), 159 delim(`}`, 0, 0), 160 }, 161 }, 162 163 { 164 input: []byte(` [ ] `), 165 tokens: []token{ 166 delim(`[`, 0, 0), 167 delim(`]`, 0, 0), 168 }, 169 }, 170 171 { 172 input: []byte(`[1, 2, 3]`), 173 tokens: []token{ 174 delim(`[`, 0, 0), 175 value(`1`, 1, 0), 176 delim(`,`, 1, 0), 177 value(`2`, 1, 1), 178 delim(`,`, 1, 1), 179 value(`3`, 1, 2), 180 delim(`]`, 0, 0), 181 }, 182 }, 183 } 184 185 for _, test := range tests { 186 t.Run(string(test.input), func(t *testing.T) { 187 tokens := tokenize(t, test.input) 188 189 if !reflect.DeepEqual(tokens, test.tokens) { 190 t.Error("tokens mismatch") 191 t.Logf("expected: %+v", test.tokens) 192 t.Logf("found: %+v", tokens) 193 } 194 }) 195 } 196 } 197 198 // Regression test for syntax that caused panics in Next. 199 func TestTokenizer_invalidInput(t *testing.T) { 200 tests := []struct { 201 scenario string 202 payload []byte 203 }{ 204 { 205 scenario: "bare comma", 206 payload: []byte(","), 207 }, 208 { 209 scenario: "comma after array", 210 payload: []byte("[],"), 211 }, 212 { 213 scenario: "comma after object", 214 payload: []byte("{},"), 215 }, 216 } 217 218 for _, test := range tests { 219 t.Run(test.scenario, func(t *testing.T) { 220 tkn := NewTokenizer(test.payload) 221 222 // This shouldn't panic 223 for tkn.Next() { 224 } 225 226 if tkn.Err == nil { 227 t.Error("expected Err to be set, got nil") 228 } 229 }) 230 } 231 } 232 233 func BenchmarkTokenizer(b *testing.B) { 234 values := []struct { 235 scenario string 236 payload []byte 237 }{ 238 { 239 scenario: "null", 240 payload: []byte(`null`), 241 }, 242 243 { 244 scenario: "true", 245 payload: []byte(`true`), 246 }, 247 248 { 249 scenario: "false", 250 payload: []byte(`false`), 251 }, 252 253 { 254 scenario: "number", 255 payload: []byte(`-1.23456789`), 256 }, 257 258 { 259 scenario: "string", 260 payload: []byte(`"1234567890"`), 261 }, 262 263 { 264 scenario: "object", 265 payload: []byte(`{ 266 "timestamp": "2019-01-09T18:59:57.456Z", 267 "channel": "server", 268 "type": "track", 269 "event": "Test", 270 "userId": "test-user-whatever", 271 "messageId": "test-message-whatever", 272 "integrations": { 273 "whatever": { 274 "debugMode": false 275 }, 276 "myIntegration": { 277 "debugMode": true 278 } 279 }, 280 "properties": { 281 "trait1": 1, 282 "trait2": "test", 283 "trait3": true 284 }, 285 "settings": { 286 "apiKey": "1234567890", 287 "debugMode": false, 288 "directChannels": [ 289 "server", 290 "client" 291 ], 292 "endpoint": "https://somewhere.com/v1/integrations/segment" 293 } 294 }`), 295 }, 296 } 297 298 benchmarks := []struct { 299 scenario string 300 function func(*testing.B, []byte) 301 }{ 302 { 303 scenario: "github.com/segmentio/encoding/json", 304 function: func(b *testing.B, json []byte) { 305 t := NewTokenizer(nil) 306 307 for i := 0; i < b.N; i++ { 308 t.Reset(json) 309 310 for t.Next() { 311 // Does nothing other than iterating over each token to measure the 312 // CPU and memory footprint. 313 } 314 315 if t.Err != nil { 316 b.Error(t.Err) 317 } 318 } 319 }, 320 }, 321 } 322 323 for _, bechmark := range benchmarks { 324 b.Run(bechmark.scenario, func(b *testing.B) { 325 for _, value := range values { 326 b.Run(value.scenario, func(b *testing.B) { 327 bechmark.function(b, value.payload) 328 b.SetBytes(int64(len(value.payload))) 329 }) 330 } 331 }) 332 } 333 }