github.com/ledgerwatch/erigon-lib@v1.0.0/state/merge_test.go (about) 1 package state 2 3 import ( 4 "sort" 5 "testing" 6 7 "github.com/stretchr/testify/assert" 8 "github.com/stretchr/testify/require" 9 btree2 "github.com/tidwall/btree" 10 11 "github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32" 12 ) 13 14 func TestFindMergeRangeCornerCases(t *testing.T) { 15 t.Run("> 2 unmerged files", func(t *testing.T) { 16 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 17 ii.scanStateFiles([]string{ 18 "test.0-2.ef", 19 "test.2-3.ef", 20 "test.3-4.ef", 21 }) 22 ii.reCalcRoFiles() 23 24 ic := ii.MakeContext() 25 defer ic.Close() 26 27 needMerge, from, to := ii.findMergeRange(4, 32) 28 assert.True(t, needMerge) 29 assert.Equal(t, 0, int(from)) 30 assert.Equal(t, 4, int(to)) 31 32 idxF, _ := ic.staticFilesInRange(from, to) 33 assert.Equal(t, 3, len(idxF)) 34 35 ii = &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 36 ii.scanStateFiles([]string{ 37 "test.0-1.ef", 38 "test.1-2.ef", 39 "test.2-3.ef", 40 "test.3-4.ef", 41 }) 42 ii.reCalcRoFiles() 43 ic = ii.MakeContext() 44 defer ic.Close() 45 46 needMerge, from, to = ii.findMergeRange(4, 32) 47 assert.True(t, needMerge) 48 assert.Equal(t, 0, int(from)) 49 assert.Equal(t, 2, int(to)) 50 51 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 52 h.scanStateFiles([]string{ 53 "test.0-1.v", 54 "test.1-2.v", 55 "test.2-3.v", 56 "test.3-4.v", 57 }) 58 h.reCalcRoFiles() 59 ic = ii.MakeContext() 60 defer ic.Close() 61 62 r := h.findMergeRange(4, 32) 63 assert.True(t, r.history) 64 assert.Equal(t, 2, int(r.historyEndTxNum)) 65 assert.Equal(t, 2, int(r.indexEndTxNum)) 66 }) 67 t.Run("not equal amount of files", func(t *testing.T) { 68 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 69 ii.scanStateFiles([]string{ 70 "test.0-1.ef", 71 "test.1-2.ef", 72 "test.2-3.ef", 73 "test.3-4.ef", 74 }) 75 ii.reCalcRoFiles() 76 77 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 78 h.scanStateFiles([]string{ 79 "test.0-1.v", 80 "test.1-2.v", 81 }) 82 h.reCalcRoFiles() 83 84 hc := h.MakeContext() 85 defer hc.Close() 86 87 r := h.findMergeRange(4, 32) 88 assert.True(t, r.index) 89 assert.True(t, r.history) 90 assert.Equal(t, 0, int(r.historyStartTxNum)) 91 assert.Equal(t, 2, int(r.historyEndTxNum)) 92 assert.Equal(t, 2, int(r.indexEndTxNum)) 93 }) 94 t.Run("idx merged, history not yet", func(t *testing.T) { 95 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 96 ii.scanStateFiles([]string{ 97 "test.0-2.ef", 98 "test.2-3.ef", 99 "test.3-4.ef", 100 }) 101 ii.reCalcRoFiles() 102 103 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 104 h.scanStateFiles([]string{ 105 "test.0-1.v", 106 "test.1-2.v", 107 }) 108 h.reCalcRoFiles() 109 110 hc := h.MakeContext() 111 defer hc.Close() 112 113 r := h.findMergeRange(4, 32) 114 assert.True(t, r.history) 115 assert.False(t, r.index) 116 assert.Equal(t, 0, int(r.historyStartTxNum)) 117 assert.Equal(t, 2, int(r.historyEndTxNum)) 118 }) 119 t.Run("idx merged, history not yet, 2", func(t *testing.T) { 120 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 121 ii.scanStateFiles([]string{ 122 "test.0-1.ef", 123 "test.1-2.ef", 124 "test.2-3.ef", 125 "test.3-4.ef", 126 "test.0-4.ef", 127 }) 128 ii.reCalcRoFiles() 129 130 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 131 h.scanStateFiles([]string{ 132 "test.0-1.v", 133 "test.1-2.v", 134 "test.2-3.v", 135 "test.3-4.v", 136 }) 137 h.reCalcRoFiles() 138 139 hc := h.MakeContext() 140 defer hc.Close() 141 142 r := h.findMergeRange(4, 32) 143 assert.False(t, r.index) 144 assert.True(t, r.history) 145 assert.Equal(t, 2, int(r.historyEndTxNum)) 146 idxFiles, histFiles, _, err := hc.staticFilesInRange(r) 147 require.NoError(t, err) 148 require.Equal(t, 2, len(idxFiles)) 149 require.Equal(t, 2, len(histFiles)) 150 }) 151 t.Run("idx merged and small files lost", func(t *testing.T) { 152 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 153 ii.scanStateFiles([]string{ 154 "test.0-4.ef", 155 }) 156 ii.reCalcRoFiles() 157 158 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 159 h.scanStateFiles([]string{ 160 "test.0-1.v", 161 "test.1-2.v", 162 "test.2-3.v", 163 "test.3-4.v", 164 }) 165 h.reCalcRoFiles() 166 167 hc := h.MakeContext() 168 defer hc.Close() 169 170 r := h.findMergeRange(4, 32) 171 assert.False(t, r.index) 172 assert.True(t, r.history) 173 assert.Equal(t, 2, int(r.historyEndTxNum)) 174 _, _, _, err := hc.staticFilesInRange(r) 175 require.Error(t, err) 176 }) 177 178 t.Run("history merged, but index not and history garbage left", func(t *testing.T) { 179 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 180 ii.scanStateFiles([]string{ 181 "test.0-1.ef", 182 "test.1-2.ef", 183 }) 184 ii.reCalcRoFiles() 185 186 // `kill -9` may leave small garbage files, but if big one already exists we assume it's good(fsynced) and no reason to merge again 187 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 188 h.scanStateFiles([]string{ 189 "test.0-1.v", 190 "test.1-2.v", 191 "test.0-2.v", 192 }) 193 h.reCalcRoFiles() 194 195 hc := h.MakeContext() 196 defer hc.Close() 197 198 r := h.findMergeRange(4, 32) 199 assert.True(t, r.index) 200 assert.False(t, r.history) 201 assert.Equal(t, uint64(2), r.indexEndTxNum) 202 idxFiles, histFiles, _, err := hc.staticFilesInRange(r) 203 require.NoError(t, err) 204 require.Equal(t, 2, len(idxFiles)) 205 require.Equal(t, 0, len(histFiles)) 206 }) 207 t.Run("history merge progress ahead of idx", func(t *testing.T) { 208 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 209 ii.scanStateFiles([]string{ 210 "test.0-1.ef", 211 "test.1-2.ef", 212 "test.0-2.ef", 213 "test.2-3.ef", 214 "test.3-4.ef", 215 }) 216 ii.reCalcRoFiles() 217 218 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 219 h.scanStateFiles([]string{ 220 "test.0-1.v", 221 "test.1-2.v", 222 "test.0-2.v", 223 "test.2-3.v", 224 "test.3-4.v", 225 }) 226 h.reCalcRoFiles() 227 228 hc := h.MakeContext() 229 defer hc.Close() 230 231 r := h.findMergeRange(4, 32) 232 assert.True(t, r.index) 233 assert.True(t, r.history) 234 assert.Equal(t, 4, int(r.indexEndTxNum)) 235 idxFiles, histFiles, _, err := hc.staticFilesInRange(r) 236 require.NoError(t, err) 237 require.Equal(t, 3, len(idxFiles)) 238 require.Equal(t, 3, len(histFiles)) 239 }) 240 t.Run("idx merge progress ahead of history", func(t *testing.T) { 241 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 242 ii.scanStateFiles([]string{ 243 "test.0-1.ef", 244 "test.1-2.ef", 245 "test.0-2.ef", 246 "test.2-3.ef", 247 }) 248 ii.reCalcRoFiles() 249 250 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 251 h.scanStateFiles([]string{ 252 "test.0-1.v", 253 "test.1-2.v", 254 "test.2-3.v", 255 }) 256 h.reCalcRoFiles() 257 258 hc := h.MakeContext() 259 defer hc.Close() 260 261 r := h.findMergeRange(4, 32) 262 assert.False(t, r.index) 263 assert.True(t, r.history) 264 assert.Equal(t, 2, int(r.historyEndTxNum)) 265 idxFiles, histFiles, _, err := hc.staticFilesInRange(r) 266 require.NoError(t, err) 267 require.Equal(t, 2, len(idxFiles)) 268 require.Equal(t, 2, len(histFiles)) 269 }) 270 t.Run("idx merged, but garbage left", func(t *testing.T) { 271 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 272 ii.scanStateFiles([]string{ 273 "test.0-1.ef", 274 "test.1-2.ef", 275 "test.0-2.ef", 276 }) 277 ii.reCalcRoFiles() 278 279 h := &History{InvertedIndex: ii, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 280 h.scanStateFiles([]string{ 281 "test.0-1.v", 282 "test.1-2.v", 283 "test.0-2.v", 284 "test.2-3.v", 285 }) 286 h.reCalcRoFiles() 287 288 hc := h.MakeContext() 289 defer hc.Close() 290 r := h.findMergeRange(4, 32) 291 assert.False(t, r.index) 292 assert.False(t, r.history) 293 }) 294 t.Run("idx merged, but garbage left2", func(t *testing.T) { 295 ii := &InvertedIndex{filenameBase: "test", aggregationStep: 1, files: btree2.NewBTreeG[*filesItem](filesItemLess)} 296 ii.scanStateFiles([]string{ 297 "test.0-1.ef", 298 "test.1-2.ef", 299 "test.0-2.ef", 300 "test.2-3.ef", 301 "test.3-4.ef", 302 }) 303 ii.reCalcRoFiles() 304 ic := ii.MakeContext() 305 defer ic.Close() 306 needMerge, from, to := ii.findMergeRange(4, 32) 307 assert.True(t, needMerge) 308 require.Equal(t, 0, int(from)) 309 require.Equal(t, 4, int(to)) 310 idxFiles, _ := ic.staticFilesInRange(from, to) 311 require.Equal(t, 3, len(idxFiles)) 312 }) 313 } 314 func Test_mergeEliasFano(t *testing.T) { 315 t.Skip() 316 317 firstList := []int{1, 298164, 298163, 13, 298160, 298159} 318 sort.Ints(firstList) 319 uniq := make(map[int]struct{}) 320 321 first := eliasfano32.NewEliasFano(uint64(len(firstList)), uint64(firstList[len(firstList)-1])) 322 for _, v := range firstList { 323 uniq[v] = struct{}{} 324 first.AddOffset(uint64(v)) 325 } 326 first.Build() 327 firstBytes := first.AppendBytes(nil) 328 329 fit := first.Iterator() 330 for fit.HasNext() { 331 v, _ := fit.Next() 332 require.Contains(t, firstList, int(v)) 333 } 334 335 secondList := []int{ 336 1, 644951, 644995, 682653, 13, 337 644988, 644987, 644946, 644994, 338 644942, 644945, 644941, 644940, 339 644939, 644938, 644792, 644787} 340 sort.Ints(secondList) 341 second := eliasfano32.NewEliasFano(uint64(len(secondList)), uint64(secondList[len(secondList)-1])) 342 343 for _, v := range secondList { 344 second.AddOffset(uint64(v)) 345 uniq[v] = struct{}{} 346 } 347 second.Build() 348 secondBytes := second.AppendBytes(nil) 349 350 sit := second.Iterator() 351 for sit.HasNext() { 352 v, _ := sit.Next() 353 require.Contains(t, secondList, int(v)) 354 } 355 356 menc, err := mergeEfs(firstBytes, secondBytes, nil) 357 require.NoError(t, err) 358 359 merged, _ := eliasfano32.ReadEliasFano(menc) 360 require.NoError(t, err) 361 require.EqualValues(t, len(uniq), merged.Count()) 362 require.EqualValues(t, merged.Count(), eliasfano32.Count(menc)) 363 mergedLists := append(firstList, secondList...) 364 sort.Ints(mergedLists) 365 require.EqualValues(t, mergedLists[len(mergedLists)-1], merged.Max()) 366 require.EqualValues(t, merged.Max(), eliasfano32.Max(menc)) 367 368 mit := merged.Iterator() 369 for mit.HasNext() { 370 v, _ := mit.Next() 371 require.Contains(t, mergedLists, int(v)) 372 } 373 }