github.com/dgraph-io/simdjson-go@v0.3.0/ndjson_test.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package simdjson 18 19 import ( 20 "fmt" 21 "io/ioutil" 22 "log" 23 "net/http" 24 "os" 25 "path/filepath" 26 "strings" 27 "testing" 28 29 "github.com/klauspost/compress/zstd" 30 ) 31 32 const demo_ndjson = `{"Image":{"Width":800,"Height":600,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}} 33 {"Image":{"Width":801,"Height":601,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}} 34 {"Image":{"Width":802,"Height":602,"Title":"View from 15th Floor","Thumbnail":{"Url":"http://www.example.com/image/481989943","Height":125,"Width":100},"Animated":false,"IDs":[116,943,234,38793]}}` 35 36 func verifyDemoNdjson(pj internalParsedJson, t *testing.T, object int) { 37 38 const nul = '\000' 39 40 testCases := []struct { 41 expected []struct { 42 c byte 43 val uint64 44 } 45 }{ 46 { 47 []struct { 48 c byte 49 val uint64 50 }{ 51 // First object 52 {'r', 0x33}, 53 {'{', 0x32}, 54 {'"', 0x2}, 55 {nul, 0x5}, 56 {'{', 0x31}, 57 {'"', 0xb}, 58 {nul, 0x5}, 59 {'l', 0x0}, 60 {nul, 0x320}, 61 {'"', 0x17}, 62 {nul, 0x6}, 63 {'l', 0x0}, 64 {nul, 0x258}, 65 {'"', 0x24}, 66 {nul, 0x5}, 67 {'"', 0x2c}, 68 {nul, 0x14}, 69 {'"', 0x43}, 70 {nul, 0x9}, 71 {'{', 0x21}, 72 {'"', 0x50}, 73 {nul, 0x3}, 74 {'"', 0x56}, 75 {nul, 0x26}, 76 {'"', 0x7f}, 77 {nul, 0x6}, 78 {'l', 0x0}, 79 {nul, 0x7d}, 80 {'"', 0x8c}, 81 {nul, 0x5}, 82 {'l', 0x0}, 83 {nul, 0x64}, 84 {'}', 0x13}, 85 {'"', 0x99}, 86 {nul, 0x8}, 87 {'f', 0x0}, 88 {'"', 0xaa}, 89 {nul, 0x3}, 90 {'[', 0x30}, 91 {'l', 0x0}, 92 {nul, 0x74}, 93 {'l', 0x0}, 94 {nul, 0x3af}, 95 {'l', 0x0}, 96 {nul, 0xea}, 97 {'l', 0x0}, 98 {nul, 0x9789}, 99 {']', 0x26}, 100 {'}', 0x4}, 101 {'}', 0x1}, 102 {'r', 0x0}, 103 // 104 // Second object 105 {'r', 0x66}, 106 {'{', 0x65}, 107 {'"', 0xc7}, 108 {nul, 0x5}, 109 {'{', 0x64}, 110 {'"', 0xd0}, 111 {nul, 0x5}, 112 {'l', 0x0}, 113 {nul, 0x321}, 114 {'"', 0xdc}, 115 {nul, 0x6}, 116 {'l', 0x0}, 117 {nul, 0x259}, 118 {'"', 0xe9}, 119 {nul, 0x5}, 120 {'"', 0xf1}, 121 {nul, 0x14}, 122 {'"', 0x108}, 123 {nul, 0x9}, 124 {'{', 0x54}, 125 {'"', 0x115}, 126 {nul, 0x3}, 127 {'"', 0x11b}, 128 {nul, 0x26}, 129 {'"', 0x144}, 130 {nul, 0x6}, 131 {'l', 0x0}, 132 {nul, 0x7d}, 133 {'"', 0x151}, 134 {nul, 0x5}, 135 {'l', 0x0}, 136 {nul, 0x64}, 137 {'}', 0x46}, 138 {'"', 0x15e}, 139 {nul, 0x8}, 140 {'f', 0x0}, 141 {'"', 0x16f}, 142 {nul, 0x3}, 143 {'[', 0x63}, 144 {'l', 0x0}, 145 {nul, 0x74}, 146 {'l', 0x0}, 147 {nul, 0x3af}, 148 {'l', 0x0}, 149 {nul, 0xea}, 150 {'l', 0x0}, 151 {nul, 0x9789}, 152 {']', 0x59}, 153 {'}', 0x37}, 154 {'}', 0x34}, 155 {'r', 0x33}, 156 // 157 // Third object 158 {'r', 0x99}, 159 {'{', 0x98}, 160 {'"', 0x18c}, 161 {nul, 0x5}, 162 {'{', 0x97}, 163 {'"', 0x195}, 164 {nul, 0x5}, 165 {'l', 0x0}, 166 {nul, 0x322}, 167 {'"', 0x1a1}, 168 {nul, 0x6}, 169 {'l', 0x0}, 170 {nul, 0x25a}, 171 {'"', 0x1ae}, 172 {nul, 0x5}, 173 {'"', 0x1b6}, 174 {nul, 0x14}, 175 {'"', 0x1cd}, 176 {nul, 0x9}, 177 {'{', 0x87}, 178 {'"', 0x1da}, 179 {nul, 0x3}, 180 {'"', 0x1e0}, 181 {nul, 0x26}, 182 {'"', 0x209}, 183 {nul, 0x6}, 184 {'l', 0x0}, 185 {nul, 0x7d}, 186 {'"', 0x216}, 187 {nul, 0x5}, 188 {'l', 0x0}, 189 {nul, 0x64}, 190 {'}', 0x79}, 191 {'"', 0x223}, 192 {nul, 0x8}, 193 {'f', 0x0}, 194 {'"', 0x234}, 195 {nul, 0x3}, 196 {'[', 0x96}, 197 {'l', 0x0}, 198 {nul, 0x74}, 199 {'l', 0x0}, 200 {nul, 0x3af}, 201 {'l', 0x0}, 202 {nul, 0xea}, 203 {'l', 0x0}, 204 {nul, 0x9789}, 205 {']', 0x8c}, 206 {'}', 0x6a}, 207 {'}', 0x67}, 208 {'r', 0x66}, 209 }, 210 }, 211 } 212 213 tc := testCases[0] 214 215 // For TestFindNewlineDelimiters, adjust the array that we are testing against 216 if object == 1 { 217 tc.expected = tc.expected[:51] 218 } else if object == 2 || object == 3 { 219 tc.expected = tc.expected[:51] 220 221 adjustQoutes := []uint64{2, 5, 9, 13, 15, 17, 20, 22, 24, 28, 33, 36} 222 for _, a := range adjustQoutes { 223 tc.expected[a].val += 1 224 } 225 if object == 2 { 226 tc.expected[8].val = 801 227 tc.expected[12].val = 601 228 } else if object == 3 { 229 tc.expected[8].val = 802 230 tc.expected[12].val = 602 231 } 232 } 233 234 if len(pj.Tape) != len(tc.expected) { 235 t.Errorf("verifyDemoNdjson: got: %d want: %d", len(pj.Tape), len(tc.expected)) 236 } 237 for ii, tp := range pj.Tape { 238 //c := "'" + string(byte(tp >> 56)) + "'" 239 //if byte(tp >> 56) == 0 { 240 // c = "nul" 241 //} 242 //fmt.Printf("{%s, 0x%x},\n", c, tp&0xffffffffffffff) 243 expected := tc.expected[ii].val | (uint64(tc.expected[ii].c) << 56) 244 if !alwaysCopyStrings && tp != expected { 245 t.Errorf("verifyDemoNdjson(%d): got: %016x want: %016x", ii, tp, expected) 246 } 247 } 248 } 249 250 func TestNdjsonCountWhere(t *testing.T) { 251 if !SupportedCPU() { 252 t.SkipNow() 253 } 254 if testing.Short() { 255 t.Skip("skipping... too long") 256 } 257 ndjson := loadFile("testdata/parking-citations-1M.json.zst") 258 pj, err := ParseND(ndjson, nil) 259 if err != nil { 260 t.Fatal(err) 261 } 262 263 const want = 110349 264 if result := countWhere("Make", "HOND", *pj); result != want { 265 t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want) 266 } 267 } 268 269 func TestNdjsonCountWhere2(t *testing.T) { 270 if !SupportedCPU() { 271 t.SkipNow() 272 } 273 if testing.Short() { 274 t.Skip("skipping... too long") 275 } 276 ndjson := loadFile("testdata/RC_2009-01.json.zst") 277 // Test trimming 278 b := make([]byte, 0, len(ndjson)+4) 279 b = append(b, '\n', '\n') 280 b = append(b, ndjson...) 281 b = append(b, '\n', '\n') 282 pj, err := ParseND(ndjson, nil) 283 if err != nil { 284 t.Fatal(err) 285 } 286 const want = 170315 287 if result := countWhere("subreddit", "reddit.com", *pj); result != want { 288 t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want) 289 } 290 } 291 292 func loadFile(filename string) []byte { 293 if !strings.HasSuffix(filename, ".zst") { 294 ndjson, err := ioutil.ReadFile(filename) 295 if err != nil { 296 panic("Failed to load file") 297 } 298 return ndjson 299 } 300 var f *os.File 301 var err error 302 for { 303 f, err = os.Open(filename) 304 if err == nil { 305 defer f.Close() 306 break 307 } 308 if os.IsNotExist(err) { 309 fmt.Println("downloading file" + filename) 310 resp, err := http.DefaultClient.Get("https://files.klauspost.com/compress/" + filepath.Base(filename)) 311 if err == nil && resp.StatusCode == http.StatusOK { 312 b, err := ioutil.ReadAll(resp.Body) 313 if err == nil { 314 err = ioutil.WriteFile(filename, b, os.ModePerm) 315 if err == nil { 316 continue 317 } 318 } 319 } 320 } 321 panic("Failed to (down)load file:" + err.Error()) 322 } 323 dec, err := zstd.NewReader(f) 324 if err != nil { 325 panic("Failed to create decompressor") 326 } 327 defer dec.Close() 328 ndjson, err := ioutil.ReadAll(dec) 329 if err != nil { 330 panic("Failed to load file") 331 } 332 return ndjson 333 } 334 335 func count_raw_tape(tape []uint64) (count int) { 336 337 for tapeidx := uint64(0); tapeidx < uint64(len(tape)); count++ { 338 tape_val := tape[tapeidx] 339 tapeidx = tape_val & JSONVALUEMASK 340 } 341 342 return 343 } 344 345 func countWhere(key, value string, data ParsedJson) (count int) { 346 tmpi := data.Iter() 347 stack := []*Iter{&tmpi} 348 var obj *Object 349 var tmp *Iter 350 var elem Element 351 352 for len(stack) > 0 { 353 iter := stack[len(stack)-1] 354 typ := iter.Advance() 355 356 typeswitch: 357 switch typ { 358 case TypeNone: 359 if len(stack) == 0 { 360 return 361 } 362 stack = stack[:len(stack)-1] 363 case TypeRoot: 364 var err error 365 typ, tmp, err = iter.Root(tmp) 366 if err != nil { 367 log.Fatal(err) 368 } 369 switch typ { 370 case TypeNone: 371 break typeswitch 372 case TypeObject: 373 default: 374 log.Fatalf("expected object inside root, got %v", typ) 375 } 376 if len(stack) > 2 { 377 break 378 } 379 obj, err = tmp.Object(obj) 380 if err != nil { 381 log.Fatal(err) 382 } 383 e := obj.FindKey(key, &elem) 384 if e != nil && elem.Type == TypeString { 385 v, _ := elem.Iter.StringBytes() 386 if string(v) == value { 387 count++ 388 } 389 } 390 default: 391 } 392 } 393 394 return 395 } 396 397 func countObjects(data ParsedJson) (count int) { 398 iter := data.Iter() 399 for { 400 typ := iter.Advance() 401 switch typ { 402 case TypeNone: 403 return 404 case TypeRoot: 405 count++ 406 default: 407 panic(typ) 408 } 409 } 410 } 411 412 func BenchmarkNdjsonWarmCountStar(b *testing.B) { 413 if !SupportedCPU() { 414 b.SkipNow() 415 } 416 417 ndjson := loadFile("testdata/parking-citations-1M.json.zst") 418 419 pj, err := ParseND(ndjson, nil) 420 if err != nil { 421 b.Fatal(err) 422 } 423 b.SetBytes(int64(len(ndjson))) 424 b.ReportAllocs() 425 b.ResetTimer() 426 427 for i := 0; i < b.N; i++ { 428 countObjects(*pj) 429 } 430 } 431 432 func BenchmarkNdjsonWarmCountStarWithWhere(b *testing.B) { 433 if !SupportedCPU() { 434 b.SkipNow() 435 } 436 437 ndjson := loadFile("testdata/parking-citations-1M.json.zst") 438 439 pj, err := ParseND(ndjson, nil) 440 if err != nil { 441 b.Fatal(err) 442 } 443 444 b.Run("iter", func(b *testing.B) { 445 b.SetBytes(int64(len(ndjson))) 446 b.ReportAllocs() 447 b.ResetTimer() 448 for i := 0; i < b.N; i++ { 449 countWhere("Make", "HOND", *pj) 450 } 451 }) 452 }