github.com/ledgerwatch/erigon-lib@v1.0.0/state/merge_test.go (about)

     1  package state
     2  
     3  import (
     4  	"sort"
     5  	"testing"
     6  
     7  	"github.com/stretchr/testify/assert"
     8  	"github.com/stretchr/testify/require"
     9  	btree2 "github.com/tidwall/btree"
    10  
    11  	"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
    12  )
    13  
    14  func TestFindMergeRangeCornerCases(t *testing.T) {
    15  	t.Run("> 2 unmerged files", func(t *testing.T) {
    16  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
    17  		ii.scanStateFiles([]string{
    18  			"test.0-2.ef",
    19  			"test.2-3.ef",
    20  			"test.3-4.ef",
    21  		})
    22  		ii.reCalcRoFiles()
    23  
    24  		ic := ii.MakeContext()
    25  		defer ic.Close()
    26  
    27  		needMerge, from, to := ii.findMergeRange(4, 32)
    28  		assert.True(t, needMerge)
    29  		assert.Equal(t, 0, int(from))
    30  		assert.Equal(t, 4, int(to))
    31  
    32  		idxF, _ := ic.staticFilesInRange(from, to)
    33  		assert.Equal(t, 3, len(idxF))
    34  
    35  		ii = &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
    36  		ii.scanStateFiles([]string{
    37  			"test.0-1.ef",
    38  			"test.1-2.ef",
    39  			"test.2-3.ef",
    40  			"test.3-4.ef",
    41  		})
    42  		ii.reCalcRoFiles()
    43  		ic = ii.MakeContext()
    44  		defer ic.Close()
    45  
    46  		needMerge, from, to = ii.findMergeRange(4, 32)
    47  		assert.True(t, needMerge)
    48  		assert.Equal(t, 0, int(from))
    49  		assert.Equal(t, 2, int(to))
    50  
    51  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
    52  		h.scanStateFiles([]string{
    53  			"test.0-1.v",
    54  			"test.1-2.v",
    55  			"test.2-3.v",
    56  			"test.3-4.v",
    57  		})
    58  		h.reCalcRoFiles()
    59  		ic = ii.MakeContext()
    60  		defer ic.Close()
    61  
    62  		r := h.findMergeRange(4, 32)
    63  		assert.True(t, r.history)
    64  		assert.Equal(t, 2, int(r.historyEndTxNum))
    65  		assert.Equal(t, 2, int(r.indexEndTxNum))
    66  	})
    67  	t.Run("not equal amount of files", func(t *testing.T) {
    68  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
    69  		ii.scanStateFiles([]string{
    70  			"test.0-1.ef",
    71  			"test.1-2.ef",
    72  			"test.2-3.ef",
    73  			"test.3-4.ef",
    74  		})
    75  		ii.reCalcRoFiles()
    76  
    77  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
    78  		h.scanStateFiles([]string{
    79  			"test.0-1.v",
    80  			"test.1-2.v",
    81  		})
    82  		h.reCalcRoFiles()
    83  
    84  		hc := h.MakeContext()
    85  		defer hc.Close()
    86  
    87  		r := h.findMergeRange(4, 32)
    88  		assert.True(t, r.index)
    89  		assert.True(t, r.history)
    90  		assert.Equal(t, 0, int(r.historyStartTxNum))
    91  		assert.Equal(t, 2, int(r.historyEndTxNum))
    92  		assert.Equal(t, 2, int(r.indexEndTxNum))
    93  	})
    94  	t.Run("idx merged, history not yet", func(t *testing.T) {
    95  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
    96  		ii.scanStateFiles([]string{
    97  			"test.0-2.ef",
    98  			"test.2-3.ef",
    99  			"test.3-4.ef",
   100  		})
   101  		ii.reCalcRoFiles()
   102  
   103  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   104  		h.scanStateFiles([]string{
   105  			"test.0-1.v",
   106  			"test.1-2.v",
   107  		})
   108  		h.reCalcRoFiles()
   109  
   110  		hc := h.MakeContext()
   111  		defer hc.Close()
   112  
   113  		r := h.findMergeRange(4, 32)
   114  		assert.True(t, r.history)
   115  		assert.False(t, r.index)
   116  		assert.Equal(t, 0, int(r.historyStartTxNum))
   117  		assert.Equal(t, 2, int(r.historyEndTxNum))
   118  	})
   119  	t.Run("idx merged, history not yet, 2", func(t *testing.T) {
   120  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   121  		ii.scanStateFiles([]string{
   122  			"test.0-1.ef",
   123  			"test.1-2.ef",
   124  			"test.2-3.ef",
   125  			"test.3-4.ef",
   126  			"test.0-4.ef",
   127  		})
   128  		ii.reCalcRoFiles()
   129  
   130  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   131  		h.scanStateFiles([]string{
   132  			"test.0-1.v",
   133  			"test.1-2.v",
   134  			"test.2-3.v",
   135  			"test.3-4.v",
   136  		})
   137  		h.reCalcRoFiles()
   138  
   139  		hc := h.MakeContext()
   140  		defer hc.Close()
   141  
   142  		r := h.findMergeRange(4, 32)
   143  		assert.False(t, r.index)
   144  		assert.True(t, r.history)
   145  		assert.Equal(t, 2, int(r.historyEndTxNum))
   146  		idxFiles, histFiles, _, err := hc.staticFilesInRange(r)
   147  		require.NoError(t, err)
   148  		require.Equal(t, 2, len(idxFiles))
   149  		require.Equal(t, 2, len(histFiles))
   150  	})
   151  	t.Run("idx merged and small files lost", func(t *testing.T) {
   152  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   153  		ii.scanStateFiles([]string{
   154  			"test.0-4.ef",
   155  		})
   156  		ii.reCalcRoFiles()
   157  
   158  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   159  		h.scanStateFiles([]string{
   160  			"test.0-1.v",
   161  			"test.1-2.v",
   162  			"test.2-3.v",
   163  			"test.3-4.v",
   164  		})
   165  		h.reCalcRoFiles()
   166  
   167  		hc := h.MakeContext()
   168  		defer hc.Close()
   169  
   170  		r := h.findMergeRange(4, 32)
   171  		assert.False(t, r.index)
   172  		assert.True(t, r.history)
   173  		assert.Equal(t, 2, int(r.historyEndTxNum))
   174  		_, _, _, err := hc.staticFilesInRange(r)
   175  		require.Error(t, err)
   176  	})
   177  
   178  	t.Run("history merged, but index not and history garbage left", func(t *testing.T) {
   179  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   180  		ii.scanStateFiles([]string{
   181  			"test.0-1.ef",
   182  			"test.1-2.ef",
   183  		})
   184  		ii.reCalcRoFiles()
   185  
   186  		// `kill -9` may leave small garbage files, but if big one already exists we assume it's good(fsynced) and no reason to merge again
   187  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   188  		h.scanStateFiles([]string{
   189  			"test.0-1.v",
   190  			"test.1-2.v",
   191  			"test.0-2.v",
   192  		})
   193  		h.reCalcRoFiles()
   194  
   195  		hc := h.MakeContext()
   196  		defer hc.Close()
   197  
   198  		r := h.findMergeRange(4, 32)
   199  		assert.True(t, r.index)
   200  		assert.False(t, r.history)
   201  		assert.Equal(t, uint64(2), r.indexEndTxNum)
   202  		idxFiles, histFiles, _, err := hc.staticFilesInRange(r)
   203  		require.NoError(t, err)
   204  		require.Equal(t, 2, len(idxFiles))
   205  		require.Equal(t, 0, len(histFiles))
   206  	})
   207  	t.Run("history merge progress ahead of idx", func(t *testing.T) {
   208  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   209  		ii.scanStateFiles([]string{
   210  			"test.0-1.ef",
   211  			"test.1-2.ef",
   212  			"test.0-2.ef",
   213  			"test.2-3.ef",
   214  			"test.3-4.ef",
   215  		})
   216  		ii.reCalcRoFiles()
   217  
   218  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   219  		h.scanStateFiles([]string{
   220  			"test.0-1.v",
   221  			"test.1-2.v",
   222  			"test.0-2.v",
   223  			"test.2-3.v",
   224  			"test.3-4.v",
   225  		})
   226  		h.reCalcRoFiles()
   227  
   228  		hc := h.MakeContext()
   229  		defer hc.Close()
   230  
   231  		r := h.findMergeRange(4, 32)
   232  		assert.True(t, r.index)
   233  		assert.True(t, r.history)
   234  		assert.Equal(t, 4, int(r.indexEndTxNum))
   235  		idxFiles, histFiles, _, err := hc.staticFilesInRange(r)
   236  		require.NoError(t, err)
   237  		require.Equal(t, 3, len(idxFiles))
   238  		require.Equal(t, 3, len(histFiles))
   239  	})
   240  	t.Run("idx merge progress ahead of history", func(t *testing.T) {
   241  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   242  		ii.scanStateFiles([]string{
   243  			"test.0-1.ef",
   244  			"test.1-2.ef",
   245  			"test.0-2.ef",
   246  			"test.2-3.ef",
   247  		})
   248  		ii.reCalcRoFiles()
   249  
   250  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   251  		h.scanStateFiles([]string{
   252  			"test.0-1.v",
   253  			"test.1-2.v",
   254  			"test.2-3.v",
   255  		})
   256  		h.reCalcRoFiles()
   257  
   258  		hc := h.MakeContext()
   259  		defer hc.Close()
   260  
   261  		r := h.findMergeRange(4, 32)
   262  		assert.False(t, r.index)
   263  		assert.True(t, r.history)
   264  		assert.Equal(t, 2, int(r.historyEndTxNum))
   265  		idxFiles, histFiles, _, err := hc.staticFilesInRange(r)
   266  		require.NoError(t, err)
   267  		require.Equal(t, 2, len(idxFiles))
   268  		require.Equal(t, 2, len(histFiles))
   269  	})
   270  	t.Run("idx merged, but garbage left", func(t *testing.T) {
   271  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   272  		ii.scanStateFiles([]string{
   273  			"test.0-1.ef",
   274  			"test.1-2.ef",
   275  			"test.0-2.ef",
   276  		})
   277  		ii.reCalcRoFiles()
   278  
   279  		h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   280  		h.scanStateFiles([]string{
   281  			"test.0-1.v",
   282  			"test.1-2.v",
   283  			"test.0-2.v",
   284  			"test.2-3.v",
   285  		})
   286  		h.reCalcRoFiles()
   287  
   288  		hc := h.MakeContext()
   289  		defer hc.Close()
   290  		r := h.findMergeRange(4, 32)
   291  		assert.False(t, r.index)
   292  		assert.False(t, r.history)
   293  	})
   294  	t.Run("idx merged, but garbage left2", func(t *testing.T) {
   295  		ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)}
   296  		ii.scanStateFiles([]string{
   297  			"test.0-1.ef",
   298  			"test.1-2.ef",
   299  			"test.0-2.ef",
   300  			"test.2-3.ef",
   301  			"test.3-4.ef",
   302  		})
   303  		ii.reCalcRoFiles()
   304  		ic := ii.MakeContext()
   305  		defer ic.Close()
   306  		needMerge, from, to := ii.findMergeRange(4, 32)
   307  		assert.True(t, needMerge)
   308  		require.Equal(t, 0, int(from))
   309  		require.Equal(t, 4, int(to))
   310  		idxFiles, _ := ic.staticFilesInRange(from, to)
   311  		require.Equal(t, 3, len(idxFiles))
   312  	})
   313  }
   314  func Test_mergeEliasFano(t *testing.T) {
   315  	t.Skip()
   316  
   317  	firstList := []int{1, 298164, 298163, 13, 298160, 298159}
   318  	sort.Ints(firstList)
   319  	uniq := make(map[int]struct{})
   320  
   321  	first := eliasfano32.NewEliasFano(uint64(len(firstList)), uint64(firstList[len(firstList)-1]))
   322  	for _, v := range firstList {
   323  		uniq[v] = struct{}{}
   324  		first.AddOffset(uint64(v))
   325  	}
   326  	first.Build()
   327  	firstBytes := first.AppendBytes(nil)
   328  
   329  	fit := first.Iterator()
   330  	for fit.HasNext() {
   331  		v, _ := fit.Next()
   332  		require.Contains(t, firstList, int(v))
   333  	}
   334  
   335  	secondList := []int{
   336  		1, 644951, 644995, 682653, 13,
   337  		644988, 644987, 644946, 644994,
   338  		644942, 644945, 644941, 644940,
   339  		644939, 644938, 644792, 644787}
   340  	sort.Ints(secondList)
   341  	second := eliasfano32.NewEliasFano(uint64(len(secondList)), uint64(secondList[len(secondList)-1]))
   342  
   343  	for _, v := range secondList {
   344  		second.AddOffset(uint64(v))
   345  		uniq[v] = struct{}{}
   346  	}
   347  	second.Build()
   348  	secondBytes := second.AppendBytes(nil)
   349  
   350  	sit := second.Iterator()
   351  	for sit.HasNext() {
   352  		v, _ := sit.Next()
   353  		require.Contains(t, secondList, int(v))
   354  	}
   355  
   356  	menc, err := mergeEfs(firstBytes, secondBytes, nil)
   357  	require.NoError(t, err)
   358  
   359  	merged, _ := eliasfano32.ReadEliasFano(menc)
   360  	require.NoError(t, err)
   361  	require.EqualValues(t, len(uniq), merged.Count())
   362  	require.EqualValues(t, merged.Count(), eliasfano32.Count(menc))
   363  	mergedLists := append(firstList, secondList...)
   364  	sort.Ints(mergedLists)
   365  	require.EqualValues(t, mergedLists[len(mergedLists)-1], merged.Max())
   366  	require.EqualValues(t, merged.Max(), eliasfano32.Max(menc))
   367  
   368  	mit := merged.Iterator()
   369  	for mit.HasNext() {
   370  		v, _ := mit.Next()
   371  		require.Contains(t, mergedLists, int(v))
   372  	}
   373  }