github.com/ledgerwatch/erigon-lib@v1.0.0/state/inverted_index_test.go (about)

     1  /*
     2     Copyright 2022 Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package state
    18  
    19  import (
    20  	"context"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"math"
    24  	"os"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/ledgerwatch/erigon-lib/common/background"
    29  	"github.com/ledgerwatch/erigon-lib/kv/iter"
    30  	"github.com/ledgerwatch/erigon-lib/kv/order"
    31  	"github.com/ledgerwatch/log/v3"
    32  	"github.com/stretchr/testify/require"
    33  	btree2 "github.com/tidwall/btree"
    34  
    35  	"github.com/ledgerwatch/erigon-lib/kv"
    36  	"github.com/ledgerwatch/erigon-lib/kv/mdbx"
    37  	"github.com/ledgerwatch/erigon-lib/recsplit"
    38  	"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
    39  )
    40  
    41  func testDbAndInvertedIndex(tb testing.TB, aggStep uint64, logger log.Logger) (string, kv.RwDB, *InvertedIndex) {
    42  	tb.Helper()
    43  	path := tb.TempDir()
    44  	tb.Cleanup(func() { os.RemoveAll(path) })
    45  	keysTable := "Keys"
    46  	indexTable := "Index"
    47  	db := mdbx.NewMDBX(logger).InMem(path).WithTableCfg(func(defaultBuckets kv.TableCfg) kv.TableCfg {
    48  		return kv.TableCfg{
    49  			keysTable:  kv.TableCfgItem{Flags: kv.DupSort},
    50  			indexTable: kv.TableCfgItem{Flags: kv.DupSort},
    51  		}
    52  	}).MustOpen()
    53  	tb.Cleanup(db.Close)
    54  	ii, err := NewInvertedIndex(path, path, aggStep, "inv" /* filenameBase */, keysTable, indexTable, false, nil, logger)
    55  	require.NoError(tb, err)
    56  	ii.DisableFsync()
    57  	tb.Cleanup(ii.Close)
    58  	return path, db, ii
    59  }
    60  
    61  func TestInvIndexCollationBuild(t *testing.T) {
    62  	logger := log.New()
    63  	logEvery := time.NewTicker(30 * time.Second)
    64  	defer logEvery.Stop()
    65  	_, db, ii := testDbAndInvertedIndex(t, 16, logger)
    66  	ctx := context.Background()
    67  	tx, err := db.BeginRw(ctx)
    68  	require.NoError(t, err)
    69  	defer tx.Rollback()
    70  	ii.SetTx(tx)
    71  	ii.StartWrites()
    72  	defer ii.FinishWrites()
    73  
    74  	ii.SetTxNum(2)
    75  	err = ii.Add([]byte("key1"))
    76  	require.NoError(t, err)
    77  
    78  	ii.SetTxNum(3)
    79  	err = ii.Add([]byte("key2"))
    80  	require.NoError(t, err)
    81  
    82  	ii.SetTxNum(6)
    83  	err = ii.Add([]byte("key1"))
    84  	require.NoError(t, err)
    85  	err = ii.Add([]byte("key3"))
    86  	require.NoError(t, err)
    87  
    88  	err = ii.Rotate().Flush(ctx, tx)
    89  	require.NoError(t, err)
    90  	err = tx.Commit()
    91  	require.NoError(t, err)
    92  
    93  	roTx, err := db.BeginRo(ctx)
    94  	require.NoError(t, err)
    95  	defer roTx.Rollback()
    96  
    97  	bs, err := ii.collate(ctx, 0, 7, roTx)
    98  	require.NoError(t, err)
    99  	require.Equal(t, 3, len(bs))
   100  	require.Equal(t, []uint64{3}, bs["key2"].ToArray())
   101  	require.Equal(t, []uint64{2, 6}, bs["key1"].ToArray())
   102  	require.Equal(t, []uint64{6}, bs["key3"].ToArray())
   103  
   104  	sf, err := ii.buildFiles(ctx, 0, bs, background.NewProgressSet())
   105  	require.NoError(t, err)
   106  	defer sf.Close()
   107  
   108  	g := sf.decomp.MakeGetter()
   109  	g.Reset(0)
   110  	var words []string
   111  	var intArrs [][]uint64
   112  	for g.HasNext() {
   113  		w, _ := g.Next(nil)
   114  		words = append(words, string(w))
   115  		w, _ = g.Next(w[:0])
   116  		ef, _ := eliasfano32.ReadEliasFano(w)
   117  		var ints []uint64
   118  		it := ef.Iterator()
   119  		for it.HasNext() {
   120  			v, _ := it.Next()
   121  			ints = append(ints, v)
   122  		}
   123  		intArrs = append(intArrs, ints)
   124  	}
   125  	require.Equal(t, []string{"key1", "key2", "key3"}, words)
   126  	require.Equal(t, [][]uint64{{2, 6}, {3}, {6}}, intArrs)
   127  	r := recsplit.NewIndexReader(sf.index)
   128  	for i := 0; i < len(words); i++ {
   129  		offset := r.Lookup([]byte(words[i]))
   130  		g.Reset(offset)
   131  		w, _ := g.Next(nil)
   132  		require.Equal(t, words[i], string(w))
   133  	}
   134  }
   135  
   136  func TestInvIndexAfterPrune(t *testing.T) {
   137  	logger := log.New()
   138  	logEvery := time.NewTicker(30 * time.Second)
   139  	defer logEvery.Stop()
   140  	_, db, ii := testDbAndInvertedIndex(t, 16, logger)
   141  	ctx := context.Background()
   142  	tx, err := db.BeginRw(ctx)
   143  	require.NoError(t, err)
   144  	defer func() {
   145  		if tx != nil {
   146  			tx.Rollback()
   147  		}
   148  	}()
   149  	ii.SetTx(tx)
   150  	ii.StartWrites()
   151  	defer ii.FinishWrites()
   152  
   153  	ii.SetTxNum(2)
   154  	err = ii.Add([]byte("key1"))
   155  	require.NoError(t, err)
   156  
   157  	ii.SetTxNum(3)
   158  	err = ii.Add([]byte("key2"))
   159  	require.NoError(t, err)
   160  
   161  	ii.SetTxNum(6)
   162  	err = ii.Add([]byte("key1"))
   163  	require.NoError(t, err)
   164  	err = ii.Add([]byte("key3"))
   165  	require.NoError(t, err)
   166  
   167  	err = ii.Rotate().Flush(ctx, tx)
   168  	require.NoError(t, err)
   169  	err = tx.Commit()
   170  	require.NoError(t, err)
   171  
   172  	roTx, err := db.BeginRo(ctx)
   173  	require.NoError(t, err)
   174  	defer roTx.Rollback()
   175  
   176  	bs, err := ii.collate(ctx, 0, 16, roTx)
   177  	require.NoError(t, err)
   178  
   179  	sf, err := ii.buildFiles(ctx, 0, bs, background.NewProgressSet())
   180  	require.NoError(t, err)
   181  
   182  	tx, err = db.BeginRw(ctx)
   183  	require.NoError(t, err)
   184  	ii.SetTx(tx)
   185  
   186  	ii.integrateFiles(sf, 0, 16)
   187  
   188  	err = ii.prune(ctx, 0, 16, math.MaxUint64, logEvery)
   189  	require.NoError(t, err)
   190  	err = tx.Commit()
   191  	require.NoError(t, err)
   192  	tx, err = db.BeginRw(ctx)
   193  	require.NoError(t, err)
   194  	ii.SetTx(tx)
   195  
   196  	for _, table := range []string{ii.indexKeysTable, ii.indexTable} {
   197  		var cur kv.Cursor
   198  		cur, err = tx.Cursor(table)
   199  		require.NoError(t, err)
   200  		defer cur.Close()
   201  		var k []byte
   202  		k, _, err = cur.First()
   203  		require.NoError(t, err)
   204  		require.Nil(t, k, table)
   205  	}
   206  }
   207  
   208  func filledInvIndex(tb testing.TB, logger log.Logger) (string, kv.RwDB, *InvertedIndex, uint64) {
   209  	tb.Helper()
   210  	return filledInvIndexOfSize(tb, uint64(1000), 16, 31, logger)
   211  }
   212  
   213  func filledInvIndexOfSize(tb testing.TB, txs, aggStep, module uint64, logger log.Logger) (string, kv.RwDB, *InvertedIndex, uint64) {
   214  	tb.Helper()
   215  	path, db, ii := testDbAndInvertedIndex(tb, aggStep, logger)
   216  	ctx, require := context.Background(), require.New(tb)
   217  	tx, err := db.BeginRw(ctx)
   218  	require.NoError(err)
   219  	defer tx.Rollback()
   220  	ii.SetTx(tx)
   221  	ii.StartWrites()
   222  	defer ii.FinishWrites()
   223  
   224  	var flusher flusher
   225  
   226  	// keys are encodings of numbers 1..31
   227  	// each key changes value on every txNum which is multiple of the key
   228  	for txNum := uint64(1); txNum <= txs; txNum++ {
   229  		ii.SetTxNum(txNum)
   230  		for keyNum := uint64(1); keyNum <= module; keyNum++ {
   231  			if txNum%keyNum == 0 {
   232  				var k [8]byte
   233  				binary.BigEndian.PutUint64(k[:], keyNum)
   234  				err = ii.Add(k[:])
   235  				require.NoError(err)
   236  			}
   237  		}
   238  		if flusher != nil {
   239  			require.NoError(flusher.Flush(ctx, tx))
   240  		}
   241  		if txNum%10 == 0 {
   242  			flusher = ii.Rotate()
   243  		}
   244  	}
   245  	if flusher != nil {
   246  		require.NoError(flusher.Flush(ctx, tx))
   247  	}
   248  	err = ii.Rotate().Flush(ctx, tx)
   249  	require.NoError(err)
   250  	err = tx.Commit()
   251  	require.NoError(err)
   252  	return path, db, ii, txs
   253  }
   254  
   255  func checkRanges(t *testing.T, db kv.RwDB, ii *InvertedIndex, txs uint64) {
   256  	t.Helper()
   257  	ctx := context.Background()
   258  	ic := ii.MakeContext()
   259  	defer ic.Close()
   260  
   261  	// Check the iterator ranges first without roTx
   262  	for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ {
   263  		var k [8]byte
   264  		binary.BigEndian.PutUint64(k[:], keyNum)
   265  		var values []uint64
   266  		t.Run("asc", func(t *testing.T) {
   267  			it, err := ic.IdxRange(k[:], 0, 976, order.Asc, -1, nil)
   268  			require.NoError(t, err)
   269  			for i := keyNum; i < 976; i += keyNum {
   270  				label := fmt.Sprintf("keyNum=%d, txNum=%d", keyNum, i)
   271  				require.True(t, it.HasNext(), label)
   272  				n, err := it.Next()
   273  				require.NoError(t, err)
   274  				require.Equal(t, i, n, label)
   275  				values = append(values, n)
   276  			}
   277  			require.False(t, it.HasNext())
   278  		})
   279  
   280  		t.Run("desc", func(t *testing.T) {
   281  			reverseStream, err := ic.IdxRange(k[:], 976-1, 0, order.Desc, -1, nil)
   282  			require.NoError(t, err)
   283  			iter.ExpectEqualU64(t, iter.ReverseArray(values), reverseStream)
   284  		})
   285  		t.Run("unbounded asc", func(t *testing.T) {
   286  			forwardLimited, err := ic.IdxRange(k[:], -1, 976, order.Asc, 2, nil)
   287  			require.NoError(t, err)
   288  			iter.ExpectEqualU64(t, iter.Array(values[:2]), forwardLimited)
   289  		})
   290  		t.Run("unbounded desc", func(t *testing.T) {
   291  			reverseLimited, err := ic.IdxRange(k[:], 976-1, -1, order.Desc, 2, nil)
   292  			require.NoError(t, err)
   293  			iter.ExpectEqualU64(t, iter.ReverseArray(values[len(values)-2:]), reverseLimited)
   294  		})
   295  		t.Run("tiny bound asc", func(t *testing.T) {
   296  			it, err := ic.IdxRange(k[:], 100, 102, order.Asc, -1, nil)
   297  			require.NoError(t, err)
   298  			expect := iter.FilterU64(iter.Array(values), func(k uint64) bool { return k >= 100 && k < 102 })
   299  			iter.ExpectEqualU64(t, expect, it)
   300  		})
   301  		t.Run("tiny bound desc", func(t *testing.T) {
   302  			it, err := ic.IdxRange(k[:], 102, 100, order.Desc, -1, nil)
   303  			require.NoError(t, err)
   304  			expect := iter.FilterU64(iter.ReverseArray(values), func(k uint64) bool { return k <= 102 && k > 100 })
   305  			iter.ExpectEqualU64(t, expect, it)
   306  		})
   307  	}
   308  	// Now check ranges that require access to DB
   309  	roTx, err := db.BeginRo(ctx)
   310  	require.NoError(t, err)
   311  	defer roTx.Rollback()
   312  	for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ {
   313  		var k [8]byte
   314  		binary.BigEndian.PutUint64(k[:], keyNum)
   315  		it, err := ic.IdxRange(k[:], 400, 1000, true, -1, roTx)
   316  		require.NoError(t, err)
   317  		var values []uint64
   318  		for i := keyNum * ((400 + keyNum - 1) / keyNum); i < txs; i += keyNum {
   319  			label := fmt.Sprintf("keyNum=%d, txNum=%d", keyNum, i)
   320  			require.True(t, it.HasNext(), label)
   321  			n, err := it.Next()
   322  			require.NoError(t, err)
   323  			require.Equal(t, i, n, label)
   324  			values = append(values, n)
   325  		}
   326  		require.False(t, it.HasNext())
   327  
   328  		reverseStream, err := ic.IdxRange(k[:], 1000-1, 400-1, false, -1, roTx)
   329  		require.NoError(t, err)
   330  		arr := iter.ToArrU64Must(reverseStream)
   331  		expect := iter.ToArrU64Must(iter.ReverseArray(values))
   332  		require.Equal(t, expect, arr)
   333  	}
   334  }
   335  
   336  func mergeInverted(tb testing.TB, db kv.RwDB, ii *InvertedIndex, txs uint64) {
   337  	tb.Helper()
   338  	logEvery := time.NewTicker(30 * time.Second)
   339  	defer logEvery.Stop()
   340  	ctx := context.Background()
   341  	// Leave the last 2 aggregation steps un-collated
   342  	tx, err := db.BeginRw(ctx)
   343  	require.NoError(tb, err)
   344  	defer tx.Rollback()
   345  	ii.SetTx(tx)
   346  
   347  	// Leave the last 2 aggregation steps un-collated
   348  	for step := uint64(0); step < txs/ii.aggregationStep-1; step++ {
   349  		func() {
   350  			bs, err := ii.collate(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, tx)
   351  			require.NoError(tb, err)
   352  			sf, err := ii.buildFiles(ctx, step, bs, background.NewProgressSet())
   353  			require.NoError(tb, err)
   354  			ii.integrateFiles(sf, step*ii.aggregationStep, (step+1)*ii.aggregationStep)
   355  			err = ii.prune(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, math.MaxUint64, logEvery)
   356  			require.NoError(tb, err)
   357  			var found bool
   358  			var startTxNum, endTxNum uint64
   359  			maxEndTxNum := ii.endTxNumMinimax()
   360  			maxSpan := ii.aggregationStep * StepsInBiggestFile
   361  
   362  			for {
   363  				if stop := func() bool {
   364  					ic := ii.MakeContext()
   365  					defer ic.Close()
   366  					found, startTxNum, endTxNum = ii.findMergeRange(maxEndTxNum, maxSpan)
   367  					if !found {
   368  						return true
   369  					}
   370  					outs, _ := ic.staticFilesInRange(startTxNum, endTxNum)
   371  					in, err := ii.mergeFiles(ctx, outs, startTxNum, endTxNum, 1, background.NewProgressSet())
   372  					require.NoError(tb, err)
   373  					ii.integrateMergedFiles(outs, in)
   374  					require.NoError(tb, err)
   375  					return false
   376  				}(); stop {
   377  					break
   378  				}
   379  			}
   380  		}()
   381  	}
   382  	err = tx.Commit()
   383  	require.NoError(tb, err)
   384  }
   385  
   386  func TestInvIndexRanges(t *testing.T) {
   387  	logger := log.New()
   388  	logEvery := time.NewTicker(30 * time.Second)
   389  	defer logEvery.Stop()
   390  	_, db, ii, txs := filledInvIndex(t, logger)
   391  	ctx := context.Background()
   392  	tx, err := db.BeginRw(ctx)
   393  	require.NoError(t, err)
   394  	defer tx.Rollback()
   395  	ii.SetTx(tx)
   396  
   397  	// Leave the last 2 aggregation steps un-collated
   398  	for step := uint64(0); step < txs/ii.aggregationStep-1; step++ {
   399  		func() {
   400  			bs, err := ii.collate(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, tx)
   401  			require.NoError(t, err)
   402  			sf, err := ii.buildFiles(ctx, step, bs, background.NewProgressSet())
   403  			require.NoError(t, err)
   404  			ii.integrateFiles(sf, step*ii.aggregationStep, (step+1)*ii.aggregationStep)
   405  			err = ii.prune(ctx, step*ii.aggregationStep, (step+1)*ii.aggregationStep, math.MaxUint64, logEvery)
   406  			require.NoError(t, err)
   407  		}()
   408  	}
   409  	err = tx.Commit()
   410  	require.NoError(t, err)
   411  
   412  	checkRanges(t, db, ii, txs)
   413  }
   414  
   415  func TestInvIndexMerge(t *testing.T) {
   416  	logger := log.New()
   417  	_, db, ii, txs := filledInvIndex(t, logger)
   418  
   419  	mergeInverted(t, db, ii, txs)
   420  	checkRanges(t, db, ii, txs)
   421  }
   422  
   423  func TestInvIndexScanFiles(t *testing.T) {
   424  	logger := log.New()
   425  	path, db, ii, txs := filledInvIndex(t, logger)
   426  
   427  	// Recreate InvertedIndex to scan the files
   428  	var err error
   429  	ii, err = NewInvertedIndex(path, path, ii.aggregationStep, ii.filenameBase, ii.indexKeysTable, ii.indexTable, false, nil, logger)
   430  	require.NoError(t, err)
   431  	defer ii.Close()
   432  
   433  	mergeInverted(t, db, ii, txs)
   434  	checkRanges(t, db, ii, txs)
   435  }
   436  
   437  func TestChangedKeysIterator(t *testing.T) {
   438  	logger := log.New()
   439  	_, db, ii, txs := filledInvIndex(t, logger)
   440  	ctx := context.Background()
   441  	mergeInverted(t, db, ii, txs)
   442  	roTx, err := db.BeginRo(ctx)
   443  	require.NoError(t, err)
   444  	defer func() {
   445  		roTx.Rollback()
   446  	}()
   447  	ic := ii.MakeContext()
   448  	defer ic.Close()
   449  	it := ic.IterateChangedKeys(0, 20, roTx)
   450  	defer func() {
   451  		it.Close()
   452  	}()
   453  	var keys []string
   454  	for it.HasNext() {
   455  		k := it.Next(nil)
   456  		keys = append(keys, fmt.Sprintf("%x", k))
   457  	}
   458  	it.Close()
   459  	require.Equal(t, []string{
   460  		"0000000000000001",
   461  		"0000000000000002",
   462  		"0000000000000003",
   463  		"0000000000000004",
   464  		"0000000000000005",
   465  		"0000000000000006",
   466  		"0000000000000007",
   467  		"0000000000000008",
   468  		"0000000000000009",
   469  		"000000000000000a",
   470  		"000000000000000b",
   471  		"000000000000000c",
   472  		"000000000000000d",
   473  		"000000000000000e",
   474  		"000000000000000f",
   475  		"0000000000000010",
   476  		"0000000000000011",
   477  		"0000000000000012",
   478  		"0000000000000013"}, keys)
   479  	it = ic.IterateChangedKeys(995, 1000, roTx)
   480  	keys = keys[:0]
   481  	for it.HasNext() {
   482  		k := it.Next(nil)
   483  		keys = append(keys, fmt.Sprintf("%x", k))
   484  	}
   485  	it.Close()
   486  	require.Equal(t, []string{
   487  		"0000000000000001",
   488  		"0000000000000002",
   489  		"0000000000000003",
   490  		"0000000000000004",
   491  		"0000000000000005",
   492  		"0000000000000006",
   493  		"0000000000000009",
   494  		"000000000000000c",
   495  		"000000000000001b",
   496  	}, keys)
   497  }
   498  
   499  func TestScanStaticFiles(t *testing.T) {
   500  	logger := log.New()
   501  	ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1,
   502  		files:  btree2.NewBTreeG[*filesItem](filesItemLess),
   503  		logger: logger,
   504  	}
   505  	files := []string{
   506  		"test.0-1.ef",
   507  		"test.1-2.ef",
   508  		"test.0-4.ef",
   509  		"test.2-3.ef",
   510  		"test.3-4.ef",
   511  		"test.4-5.ef",
   512  	}
   513  	ii.scanStateFiles(files)
   514  	require.Equal(t, 6, ii.files.Len())
   515  
   516  	//integrity extension case
   517  	ii.files.Clear()
   518  	ii.integrityFileExtensions = []string{"v"}
   519  	ii.scanStateFiles(files)
   520  	require.Equal(t, 0, ii.files.Len())
   521  }
   522  
   523  func TestCtxFiles(t *testing.T) {
   524  	logger := log.New()
   525  	ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1,
   526  		files:  btree2.NewBTreeG[*filesItem](filesItemLess),
   527  		logger: logger,
   528  	}
   529  	files := []string{
   530  		"test.0-1.ef", // overlap with same `endTxNum=4`
   531  		"test.1-2.ef",
   532  		"test.0-4.ef",
   533  		"test.2-3.ef",
   534  		"test.3-4.ef",
   535  		"test.4-5.ef",     // no overlap
   536  		"test.480-484.ef", // overlap with same `startTxNum=480`
   537  		"test.480-488.ef",
   538  		"test.480-496.ef",
   539  		"test.480-512.ef",
   540  	}
   541  	ii.scanStateFiles(files)
   542  	require.Equal(t, 10, ii.files.Len())
   543  
   544  	roFiles := ctxFiles(ii.files)
   545  	for i, item := range roFiles {
   546  		if item.src.canDelete.Load() {
   547  			require.Failf(t, "deleted file", "%d-%d", item.src.startTxNum, item.src.endTxNum)
   548  		}
   549  		if i == 0 {
   550  			continue
   551  		}
   552  		if item.src.isSubsetOf(roFiles[i-1].src) || roFiles[i-1].src.isSubsetOf(item.src) {
   553  			require.Failf(t, "overlaping files", "%d-%d, %d-%d", item.src.startTxNum, item.src.endTxNum, roFiles[i-1].src.startTxNum, roFiles[i-1].src.endTxNum)
   554  		}
   555  	}
   556  	require.Equal(t, 3, len(roFiles))
   557  
   558  	require.Equal(t, 0, int(roFiles[0].startTxNum))
   559  	require.Equal(t, 4, int(roFiles[0].endTxNum))
   560  
   561  	require.Equal(t, 4, int(roFiles[1].startTxNum))
   562  	require.Equal(t, 5, int(roFiles[1].endTxNum))
   563  
   564  	require.Equal(t, 480, int(roFiles[2].startTxNum))
   565  	require.Equal(t, 512, int(roFiles[2].endTxNum))
   566  }