github.com/ledgerwatch/erigon-lib@v1.0.0/state/inverted_index_test.go (about) 1 /* 2 Copyright 2022 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package state 18 19 import ( 20 "context" 21 "encoding/binary" 22 "fmt" 23 "math" 24 "os" 25 "testing" 26 "time" 27 28 "github.com/ledgerwatch/erigon-lib/common/background" 29 "github.com/ledgerwatch/erigon-lib/kv/iter" 30 "github.com/ledgerwatch/erigon-lib/kv/order" 31 "github.com/ledgerwatch/log/v3" 32 "github.com/stretchr/testify/require" 33 btree2 "github.com/tidwall/btree" 34 35 "github.com/ledgerwatch/erigon-lib/kv" 36 "github.com/ledgerwatch/erigon-lib/kv/mdbx" 37 "github.com/ledgerwatch/erigon-lib/recsplit" 38 "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" 39 ) 40 41 func testDbAndInvertedIndex(tb testing.TB, aggStep uint64, logger log.Logger) (string, kv.RwDB, *InvertedIndex) { 42 tb.Helper() 43 path := tb.TempDir() 44 tb.Cleanup(func() { os.RemoveAll(path) }) 45 keysTable := "Keys" 46 indexTable := "Index" 47 db := mdbx.NewMDBX(logger).InMem(path).WithTableCfg(func(defaultBuckets kv.TableCfg) kv.TableCfg { 48 return kv.TableCfg{ 49 keysTable: kv.TableCfgItem{Flags: kv.DupSort}, 50 indexTable: kv.TableCfgItem{Flags: kv.DupSort}, 51 } 52 }).MustOpen() 53 tb.Cleanup(db.Close) 54 ii, err := NewInvertedIndex(path, path, aggStep, "inv" /* filenameBase */, keysTable, indexTable, false, nil, logger) 55 require.NoError(tb, err) 56 ii.DisableFsync() 57 tb.Cleanup(ii.Close) 58 return path, db, ii 59 } 60 61 func TestInvIndexCollationBuild(t *testing.T) { 62 logger := log.New() 63 logEvery := time.NewTicker(30 * time.Second) 64 defer logEvery.Stop() 65 _, db, ii := testDbAndInvertedIndex(t, 16, logger) 66 ctx := context.Background() 67 tx, err := db.BeginRw(ctx) 68 require.NoError(t, err) 69 defer tx.Rollback() 70 ii.SetTx(tx) 71 ii.StartWrites() 72 defer ii.FinishWrites() 73 74 ii.SetTxNum(2) 75 err = ii.Add([]byte("key1")) 76 require.NoError(t, err) 77 78 ii.SetTxNum(3) 79 err = ii.Add([]byte("key2")) 80 require.NoError(t, err) 81 82 ii.SetTxNum(6) 83 err = ii.Add([]byte("key1")) 84 require.NoError(t, err) 85 err = ii.Add([]byte("key3")) 86 require.NoError(t, err) 87 88 err = ii.Rotate().Flush(ctx, tx) 89 require.NoError(t, err) 90 err = tx.Commit() 91 require.NoError(t, err) 92 93 roTx, err := db.BeginRo(ctx) 94 require.NoError(t, err) 95 defer roTx.Rollback() 96 97 bs, err := ii.collate(ctx, 0, 7, roTx) 98 require.NoError(t, err) 99 require.Equal(t, 3, len(bs)) 100 require.Equal(t, []uint64{3}, bs["key2"].ToArray()) 101 require.Equal(t, []uint64{2, 6}, bs["key1"].ToArray()) 102 require.Equal(t, []uint64{6}, bs["key3"].ToArray()) 103 104 sf, err := ii.buildFiles(ctx, 0, bs, background.NewProgressSet()) 105 require.NoError(t, err) 106 defer sf.Close() 107 108 g := sf.decomp.MakeGetter() 109 g.Reset(0) 110 var words []string 111 var intArrs [][]uint64 112 for g.HasNext() { 113 w, _ := g.Next(nil) 114 words = append(words, string(w)) 115 w, _ = g.Next(w[:0]) 116 ef, _ := eliasfano32.ReadEliasFano(w) 117 var ints []uint64 118 it := ef.Iterator() 119 for it.HasNext() { 120 v, _ := it.Next() 121 ints = append(ints, v) 122 } 123 intArrs = append(intArrs, ints) 124 } 125 require.Equal(t, []string{"key1", "key2", "key3"}, words) 126 require.Equal(t, [][]uint64{{2, 6}, {3}, {6}}, intArrs) 127 r := recsplit.NewIndexReader(sf.index) 128 for i := 0; i < len(words); i++ { 129 offset := r.Lookup([]byte(words[i])) 130 g.Reset(offset) 131 w, _ := g.Next(nil) 132 require.Equal(t, words[i], string(w)) 133 } 134 } 135 136 func TestInvIndexAfterPrune(t *testing.T) { 137 logger := log.New() 138 logEvery := time.NewTicker(30 * time.Second) 139 defer logEvery.Stop() 140 _, db, ii := testDbAndInvertedIndex(t, 16, logger) 141 ctx := context.Background() 142 tx, err := db.BeginRw(ctx) 143 require.NoError(t, err) 144 defer func() { 145 if tx != nil { 146 tx.Rollback() 147 } 148 }() 149 ii.SetTx(tx) 150 ii.StartWrites() 151 defer ii.FinishWrites() 152 153 ii.SetTxNum(2) 154 err = ii.Add([]byte("key1")) 155 require.NoError(t, err) 156 157 ii.SetTxNum(3) 158 err = ii.Add([]byte("key2")) 159 require.NoError(t, err) 160 161 ii.SetTxNum(6) 162 err = ii.Add([]byte("key1")) 163 require.NoError(t, err) 164 err = ii.Add([]byte("key3")) 165 require.NoError(t, err) 166 167 err = ii.Rotate().Flush(ctx, tx) 168 require.NoError(t, err) 169 err = tx.Commit() 170 require.NoError(t, err) 171 172 roTx, err := db.BeginRo(ctx) 173 require.NoError(t, err) 174 defer roTx.Rollback() 175 176 bs, err := ii.collate(ctx, 0, 16, roTx) 177 require.NoError(t, err) 178 179 sf, err := ii.buildFiles(ctx, 0, bs, background.NewProgressSet()) 180 require.NoError(t, err) 181 182 tx, err = db.BeginRw(ctx) 183 require.NoError(t, err) 184 ii.SetTx(tx) 185 186 ii.integrateFiles(sf, 0, 16) 187 188 err = ii.prune(ctx, 0, 16, math.MaxUint64, logEvery) 189 require.NoError(t, err) 190 err = tx.Commit() 191 require.NoError(t, err) 192 tx, err = db.BeginRw(ctx) 193 require.NoError(t, err) 194 ii.SetTx(tx) 195 196 for _, table := range []string{ii.indexKeysTable, ii.indexTable} { 197 var cur kv.Cursor 198 cur, err = tx.Cursor(table) 199 require.NoError(t, err) 200 defer cur.Close() 201 var k []byte 202 k, _, err = cur.First() 203 require.NoError(t, err) 204 require.Nil(t, k, table) 205 } 206 } 207 208 func filledInvIndex(tb testing.TB, logger log.Logger) (string, kv.RwDB, *InvertedIndex, uint64) { 209 tb.Helper() 210 return filledInvIndexOfSize(tb, uint64(1000), 16, 31, logger) 211 } 212 213 func filledInvIndexOfSize(tb testing.TB, txs, aggStep, module uint64, logger log.Logger) (string, kv.RwDB, *InvertedIndex, uint64) { 214 tb.Helper() 215 path, db, ii := testDbAndInvertedIndex(tb, aggStep, logger) 216 ctx, require := context.Background(), require.New(tb) 217 tx, err := db.BeginRw(ctx) 218 require.NoError(err) 219 defer tx.Rollback() 220 ii.SetTx(tx) 221 ii.StartWrites() 222 defer ii.FinishWrites() 223 224 var flusher flusher 225 226 // keys are encodings of numbers 1..31 227 // each key changes value on every txNum which is multiple of the key 228 for txNum := uint64(1); txNum <= txs; txNum++ { 229 ii.SetTxNum(txNum) 230 for keyNum := uint64(1); keyNum <= module; keyNum++ { 231 if txNum%keyNum == 0 { 232 var k [8]byte 233 binary.BigEndian.PutUint64(k[:], keyNum) 234 err = ii.Add(k[:]) 235 require.NoError(err) 236 } 237 } 238 if flusher != nil { 239 require.NoError(flusher.Flush(ctx, tx)) 240 } 241 if txNum%10 == 0 { 242 flusher = ii.Rotate() 243 } 244 } 245 if flusher != nil { 246 require.NoError(flusher.Flush(ctx, tx)) 247 } 248 err = ii.Rotate().Flush(ctx, tx) 249 require.NoError(err) 250 err = tx.Commit() 251 require.NoError(err) 252 return path, db, ii, txs 253 } 254 255 func checkRanges(t *testing.T, db kv.RwDB, ii *InvertedIndex, txs uint64) { 256 t.Helper() 257 ctx := context.Background() 258 ic := ii.MakeContext() 259 defer ic.Close() 260 261 // Check the iterator ranges first without roTx 262 for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ { 263 var k [8]byte 264 binary.BigEndian.PutUint64(k[:], keyNum) 265 var values []uint64 266 t.Run("asc", func(t *testing.T) { 267 it, err := ic.IdxRange(k[:], 0, 976, order.Asc, -1, nil) 268 require.NoError(t, err) 269 for i := keyNum; i < 976; i += keyNum { 270 label := fmt.Sprintf("keyNum=%d, txNum=%d", keyNum, i) 271 require.True(t, it.HasNext(), label) 272 n, err := it.Next() 273 require.NoError(t, err) 274 require.Equal(t, i, n, label) 275 values = append(values, n) 276 } 277 require.False(t, it.HasNext()) 278 }) 279 280 t.Run("desc", func(t *testing.T) { 281 reverseStream, err := ic.IdxRange(k[:], 976-1, 0, order.Desc, -1, nil) 282 require.NoError(t, err) 283 iter.ExpectEqualU64(t, iter.ReverseArray(values), reverseStream) 284 }) 285 t.Run("unbounded asc", func(t *testing.T) { 286 forwardLimited, err := ic.IdxRange(k[:], -1, 976, order.Asc, 2, nil) 287 require.NoError(t, err) 288 iter.ExpectEqualU64(t, iter.Array(values[:2]), forwardLimited) 289 }) 290 t.Run("unbounded desc", func(t *testing.T) { 291 reverseLimited, err := ic.IdxRange(k[:], 976-1, -1, order.Desc, 2, nil) 292 require.NoError(t, err) 293 iter.ExpectEqualU64(t, iter.ReverseArray(values[len(values)-2:]), reverseLimited) 294 }) 295 t.Run("tiny bound asc", func(t *testing.T) { 296 it, err := ic.IdxRange(k[:], 100, 102, order.Asc, -1, nil) 297 require.NoError(t, err) 298 expect := iter.FilterU64(iter.Array(values), func(k uint64) bool { return k >= 100 && k < 102 }) 299 iter.ExpectEqualU64(t, expect, it) 300 }) 301 t.Run("tiny bound desc", func(t *testing.T) { 302 it, err := ic.IdxRange(k[:], 102, 100, order.Desc, -1, nil) 303 require.NoError(t, err) 304 expect := iter.FilterU64(iter.ReverseArray(values), func(k uint64) bool { return k <= 102 && k > 100 }) 305 iter.ExpectEqualU64(t, expect, it) 306 }) 307 } 308 // Now check ranges that require access to DB 309 roTx, err := db.BeginRo(ctx) 310 require.NoError(t, err) 311 defer roTx.Rollback() 312 for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ { 313 var k [8]byte 314 binary.BigEndian.PutUint64(k[:], keyNum) 315 it, err := ic.IdxRange(k[:], 400, 1000, true, -1, roTx) 316 require.NoError(t, err) 317 var values []uint64 318 for i := keyNum * ((400 + keyNum - 1) / keyNum); i < txs; i += keyNum { 319 label := fmt.Sprintf("keyNum=%d, txNum=%d", keyNum, i) 320 require.True(t, it.HasNext(), label) 321 n, err := it.Next() 322 require.NoError(t, err) 323 require.Equal(t, i, n, label) 324 values = append(values, n) 325 } 326 require.False(t, it.HasNext()) 327 328 reverseStream, err := ic.IdxRange(k[:], 1000-1, 400-1, false, -1, roTx) 329 require.NoError(t, err) 330 arr := iter.ToArrU64Must(reverseStream) 331 expect := iter.ToArrU64Must(iter.ReverseArray(values)) 332 require.Equal(t, expect, arr) 333 } 334 } 335 336 func mergeInverted(tb testing.TB, db kv.RwDB, ii *InvertedIndex, txs uint64) { 337 tb.Helper() 338 logEvery := time.NewTicker(30 * time.Second) 339 defer logEvery.Stop() 340 ctx := context.Background() 341 // Leave the last 2 aggregation steps un-collated 342 tx, err := db.BeginRw(ctx) 343 require.NoError(tb, err) 344 defer tx.Rollback() 345 ii.SetTx(tx) 346 347 // Leave the last 2 aggregation steps un-collated 348 for step := uint64(0); step < txs/ii.aggregationStep-1; step++ { 349 func() { 350 bs, err := ii.collate(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, tx) 351 require.NoError(tb, err) 352 sf, err := ii.buildFiles(ctx, step, bs, background.NewProgressSet()) 353 require.NoError(tb, err) 354 ii.integrateFiles(sf, step*ii.aggregationStep, (step+1)*ii.aggregationStep) 355 err = ii.prune(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, math.MaxUint64, logEvery) 356 require.NoError(tb, err) 357 var found bool 358 var startTxNum, endTxNum uint64 359 maxEndTxNum := ii.endTxNumMinimax() 360 maxSpan := ii.aggregationStep * StepsInBiggestFile 361 362 for { 363 if stop := func() bool { 364 ic := ii.MakeContext() 365 defer ic.Close() 366 found, startTxNum, endTxNum = ii.findMergeRange(maxEndTxNum, maxSpan) 367 if !found { 368 return true 369 } 370 outs, _ := ic.staticFilesInRange(startTxNum, endTxNum) 371 in, err := ii.mergeFiles(ctx, outs, startTxNum, endTxNum, 1, background.NewProgressSet()) 372 require.NoError(tb, err) 373 ii.integrateMergedFiles(outs, in) 374 require.NoError(tb, err) 375 return false 376 }(); stop { 377 break 378 } 379 } 380 }() 381 } 382 err = tx.Commit() 383 require.NoError(tb, err) 384 } 385 386 func TestInvIndexRanges(t *testing.T) { 387 logger := log.New() 388 logEvery := time.NewTicker(30 * time.Second) 389 defer logEvery.Stop() 390 _, db, ii, txs := filledInvIndex(t, logger) 391 ctx := context.Background() 392 tx, err := db.BeginRw(ctx) 393 require.NoError(t, err) 394 defer tx.Rollback() 395 ii.SetTx(tx) 396 397 // Leave the last 2 aggregation steps un-collated 398 for step := uint64(0); step < txs/ii.aggregationStep-1; step++ { 399 func() { 400 bs, err := ii.collate(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, tx) 401 require.NoError(t, err) 402 sf, err := ii.buildFiles(ctx, step, bs, background.NewProgressSet()) 403 require.NoError(t, err) 404 ii.integrateFiles(sf, step*ii.aggregationStep, (step+1)*ii.aggregationStep) 405 err = ii.prune(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, math.MaxUint64, logEvery) 406 require.NoError(t, err) 407 }() 408 } 409 err = tx.Commit() 410 require.NoError(t, err) 411 412 checkRanges(t, db, ii, txs) 413 } 414 415 func TestInvIndexMerge(t *testing.T) { 416 logger := log.New() 417 _, db, ii, txs := filledInvIndex(t, logger) 418 419 mergeInverted(t, db, ii, txs) 420 checkRanges(t, db, ii, txs) 421 } 422 423 func TestInvIndexScanFiles(t *testing.T) { 424 logger := log.New() 425 path, db, ii, txs := filledInvIndex(t, logger) 426 427 // Recreate InvertedIndex to scan the files 428 var err error 429 ii, err = NewInvertedIndex(path, path, ii.aggregationStep, ii.filenameBase, ii.indexKeysTable, ii.indexTable, false, nil, logger) 430 require.NoError(t, err) 431 defer ii.Close() 432 433 mergeInverted(t, db, ii, txs) 434 checkRanges(t, db, ii, txs) 435 } 436 437 func TestChangedKeysIterator(t *testing.T) { 438 logger := log.New() 439 _, db, ii, txs := filledInvIndex(t, logger) 440 ctx := context.Background() 441 mergeInverted(t, db, ii, txs) 442 roTx, err := db.BeginRo(ctx) 443 require.NoError(t, err) 444 defer func() { 445 roTx.Rollback() 446 }() 447 ic := ii.MakeContext() 448 defer ic.Close() 449 it := ic.IterateChangedKeys(0, 20, roTx) 450 defer func() { 451 it.Close() 452 }() 453 var keys []string 454 for it.HasNext() { 455 k := it.Next(nil) 456 keys = append(keys, fmt.Sprintf("%x", k)) 457 } 458 it.Close() 459 require.Equal(t, []string{ 460 "0000000000000001", 461 "0000000000000002", 462 "0000000000000003", 463 "0000000000000004", 464 "0000000000000005", 465 "0000000000000006", 466 "0000000000000007", 467 "0000000000000008", 468 "0000000000000009", 469 "000000000000000a", 470 "000000000000000b", 471 "000000000000000c", 472 "000000000000000d", 473 "000000000000000e", 474 "000000000000000f", 475 "0000000000000010", 476 "0000000000000011", 477 "0000000000000012", 478 "0000000000000013"}, keys) 479 it = ic.IterateChangedKeys(995, 1000, roTx) 480 keys = keys[:0] 481 for it.HasNext() { 482 k := it.Next(nil) 483 keys = append(keys, fmt.Sprintf("%x", k)) 484 } 485 it.Close() 486 require.Equal(t, []string{ 487 "0000000000000001", 488 "0000000000000002", 489 "0000000000000003", 490 "0000000000000004", 491 "0000000000000005", 492 "0000000000000006", 493 "0000000000000009", 494 "000000000000000c", 495 "000000000000001b", 496 }, keys) 497 } 498 499 func TestScanStaticFiles(t *testing.T) { 500 logger := log.New() 501 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, 502 files: btree2.NewBTreeG[*filesItem](filesItemLess), 503 logger: logger, 504 } 505 files := []string{ 506 "test.0-1.ef", 507 "test.1-2.ef", 508 "test.0-4.ef", 509 "test.2-3.ef", 510 "test.3-4.ef", 511 "test.4-5.ef", 512 } 513 ii.scanStateFiles(files) 514 require.Equal(t, 6, ii.files.Len()) 515 516 //integrity extension case 517 ii.files.Clear() 518 ii.integrityFileExtensions = []string{"v"} 519 ii.scanStateFiles(files) 520 require.Equal(t, 0, ii.files.Len()) 521 } 522 523 func TestCtxFiles(t *testing.T) { 524 logger := log.New() 525 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, 526 files: btree2.NewBTreeG[*filesItem](filesItemLess), 527 logger: logger, 528 } 529 files := []string{ 530 "test.0-1.ef", // overlap with same `endTxNum=4` 531 "test.1-2.ef", 532 "test.0-4.ef", 533 "test.2-3.ef", 534 "test.3-4.ef", 535 "test.4-5.ef", // no overlap 536 "test.480-484.ef", // overlap with same `startTxNum=480` 537 "test.480-488.ef", 538 "test.480-496.ef", 539 "test.480-512.ef", 540 } 541 ii.scanStateFiles(files) 542 require.Equal(t, 10, ii.files.Len()) 543 544 roFiles := ctxFiles(ii.files) 545 for i, item := range roFiles { 546 if item.src.canDelete.Load() { 547 require.Failf(t, "deleted file", "%d-%d", item.src.startTxNum, item.src.endTxNum) 548 } 549 if i == 0 { 550 continue 551 } 552 if item.src.isSubsetOf(roFiles[i-1].src) || roFiles[i-1].src.isSubsetOf(item.src) { 553 require.Failf(t, "overlaping files", "%d-%d, %d-%d", item.src.startTxNum, item.src.endTxNum, roFiles[i-1].src.startTxNum, roFiles[i-1].src.endTxNum) 554 } 555 } 556 require.Equal(t, 3, len(roFiles)) 557 558 require.Equal(t, 0, int(roFiles[0].startTxNum)) 559 require.Equal(t, 4, int(roFiles[0].endTxNum)) 560 561 require.Equal(t, 4, int(roFiles[1].startTxNum)) 562 require.Equal(t, 5, int(roFiles[1].endTxNum)) 563 564 require.Equal(t, 480, int(roFiles[2].startTxNum)) 565 require.Equal(t, 512, int(roFiles[2].endTxNum)) 566 }