github.com/ledgerwatch/erigon-lib@v1.0.0/state/locality_index.go (about) 1 /* 2 Copyright 2022 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package state 18 19 import ( 20 "bytes" 21 "container/heap" 22 "context" 23 "fmt" 24 "os" 25 "path/filepath" 26 "regexp" 27 "strconv" 28 "sync/atomic" 29 "time" 30 31 "github.com/ledgerwatch/erigon-lib/common/assert" 32 "github.com/ledgerwatch/erigon-lib/common/dir" 33 "github.com/ledgerwatch/erigon-lib/kv/bitmapdb" 34 "github.com/ledgerwatch/erigon-lib/recsplit" 35 "github.com/ledgerwatch/log/v3" 36 ) 37 38 const LocalityIndexUint64Limit = 64 //bitmap spend 1 bit per file, stored as uint64 39 40 // LocalityIndex - has info in which .ef files exists given key 41 // Format: key -> bitmap(step_number_list) 42 // step_number_list is list of .ef files where exists given key 43 type LocalityIndex struct { 44 filenameBase string 45 dir, tmpdir string // Directory where static files are created 46 aggregationStep uint64 // immutable 47 48 file *filesItem 49 bm *bitmapdb.FixedSizeBitmaps 50 51 roFiles atomic.Pointer[ctxItem] 52 roBmFile atomic.Pointer[bitmapdb.FixedSizeBitmaps] 53 logger log.Logger 54 } 55 56 func NewLocalityIndex( 57 dir, tmpdir string, 58 aggregationStep uint64, 59 filenameBase string, 60 logger log.Logger, 61 ) (*LocalityIndex, error) { 62 li := &LocalityIndex{ 63 dir: dir, 64 tmpdir: tmpdir, 65 aggregationStep: aggregationStep, 66 filenameBase: filenameBase, 67 logger: logger, 68 } 69 return li, nil 70 } 71 func (li *LocalityIndex) closeWhatNotInList(fNames []string) { 72 if li == nil || li.bm == nil { 73 return 74 } 75 76 for _, protectName := range fNames { 77 if li.bm.FileName() == protectName { 78 return 79 } 80 } 81 li.closeFiles() 82 } 83 84 func (li *LocalityIndex) OpenList(fNames []string) error { 85 if li == nil { 86 return nil 87 } 88 li.closeWhatNotInList(fNames) 89 _ = li.scanStateFiles(fNames) 90 if err := li.openFiles(); err != nil { 91 return fmt.Errorf("NewHistory.openFiles: %s, %w", li.filenameBase, err) 92 } 93 return nil 94 } 95 96 func (li *LocalityIndex) scanStateFiles(fNames []string) (uselessFiles []*filesItem) { 97 if li == nil { 98 return nil 99 } 100 101 re := regexp.MustCompile("^" + li.filenameBase + ".([0-9]+)-([0-9]+).li$") 102 var err error 103 for _, name := range fNames { 104 subs := re.FindStringSubmatch(name) 105 if len(subs) != 3 { 106 if len(subs) != 0 { 107 li.logger.Warn("File ignored by inverted index scan, more than 3 submatches", "name", name, "submatches", len(subs)) 108 } 109 continue 110 } 111 var startStep, endStep uint64 112 if startStep, err = strconv.ParseUint(subs[1], 10, 64); err != nil { 113 li.logger.Warn("File ignored by inverted index scan, parsing startTxNum", "error", err, "name", name) 114 continue 115 } 116 if endStep, err = strconv.ParseUint(subs[2], 10, 64); err != nil { 117 li.logger.Warn("File ignored by inverted index scan, parsing endTxNum", "error", err, "name", name) 118 continue 119 } 120 if startStep > endStep { 121 li.logger.Warn("File ignored by inverted index scan, startTxNum > endTxNum", "name", name) 122 continue 123 } 124 125 if startStep != 0 { 126 li.logger.Warn("LocalityIndex must always starts from step 0") 127 continue 128 } 129 if endStep > StepsInBiggestFile*LocalityIndexUint64Limit { 130 li.logger.Warn("LocalityIndex does store bitmaps as uint64, means it can't handle > 2048 steps. But it's possible to implement") 131 continue 132 } 133 134 startTxNum, endTxNum := startStep*li.aggregationStep, endStep*li.aggregationStep 135 if li.file == nil { 136 li.file = newFilesItem(startTxNum, endTxNum, li.aggregationStep) 137 li.file.frozen = false // LocalityIndex files are never frozen 138 } else if li.file.endTxNum < endTxNum { 139 uselessFiles = append(uselessFiles, li.file) 140 li.file = newFilesItem(startTxNum, endTxNum, li.aggregationStep) 141 li.file.frozen = false // LocalityIndex files are never frozen 142 } 143 } 144 return uselessFiles 145 } 146 147 func (li *LocalityIndex) openFiles() (err error) { 148 if li == nil || li.file == nil { 149 return nil 150 } 151 152 fromStep, toStep := li.file.startTxNum/li.aggregationStep, li.file.endTxNum/li.aggregationStep 153 if li.bm == nil { 154 dataPath := filepath.Join(li.dir, fmt.Sprintf("%s.%d-%d.l", li.filenameBase, fromStep, toStep)) 155 if dir.FileExist(dataPath) { 156 li.bm, err = bitmapdb.OpenFixedSizeBitmaps(dataPath, int((toStep-fromStep)/StepsInBiggestFile)) 157 if err != nil { 158 return err 159 } 160 } 161 } 162 if li.file.index == nil { 163 idxPath := filepath.Join(li.dir, fmt.Sprintf("%s.%d-%d.li", li.filenameBase, fromStep, toStep)) 164 if dir.FileExist(idxPath) { 165 li.file.index, err = recsplit.OpenIndex(idxPath) 166 if err != nil { 167 return fmt.Errorf("LocalityIndex.openFiles: %w, %s", err, idxPath) 168 } 169 } 170 } 171 li.reCalcRoFiles() 172 return nil 173 } 174 175 func (li *LocalityIndex) closeFiles() { 176 if li == nil { 177 return 178 } 179 if li.file != nil && li.file.index != nil { 180 li.file.index.Close() 181 li.file = nil 182 } 183 if li.bm != nil { 184 li.bm.Close() 185 li.bm = nil 186 } 187 } 188 func (li *LocalityIndex) reCalcRoFiles() { 189 if li == nil || li.file == nil { 190 return 191 } 192 li.roFiles.Store(&ctxItem{ 193 startTxNum: li.file.startTxNum, 194 endTxNum: li.file.endTxNum, 195 i: 0, 196 src: li.file, 197 }) 198 li.roBmFile.Store(li.bm) 199 } 200 201 func (li *LocalityIndex) MakeContext() *ctxLocalityIdx { 202 if li == nil { 203 return nil 204 } 205 x := &ctxLocalityIdx{ 206 file: li.roFiles.Load(), 207 bm: li.roBmFile.Load(), 208 } 209 if x.file != nil && x.file.src != nil { 210 x.file.src.refcount.Add(1) 211 } 212 return x 213 } 214 215 func (out *ctxLocalityIdx) Close(logger log.Logger) { 216 if out == nil || out.file == nil || out.file.src == nil { 217 return 218 } 219 refCnt := out.file.src.refcount.Add(-1) 220 if refCnt == 0 && out.file.src.canDelete.Load() { 221 closeLocalityIndexFilesAndRemove(out, logger) 222 } 223 } 224 225 func closeLocalityIndexFilesAndRemove(i *ctxLocalityIdx, logger log.Logger) { 226 if i.file.src != nil { 227 i.file.src.closeFilesAndRemove() 228 i.file.src = nil 229 } 230 if i.bm != nil { 231 i.bm.Close() 232 if err := os.Remove(i.bm.FilePath()); err != nil { 233 logger.Trace("os.Remove", "err", err, "file", i.bm.FileName()) 234 } 235 i.bm = nil 236 } 237 } 238 239 func (li *LocalityIndex) Close() { 240 li.closeWhatNotInList([]string{}) 241 li.reCalcRoFiles() 242 } 243 func (li *LocalityIndex) Files() (res []string) { return res } 244 func (li *LocalityIndex) NewIdxReader() *recsplit.IndexReader { 245 if li != nil && li.file != nil && li.file.index != nil { 246 return recsplit.NewIndexReader(li.file.index) 247 } 248 return nil 249 } 250 251 // LocalityIndex return exactly 2 file (step) 252 // prevents searching key in many files 253 func (li *LocalityIndex) lookupIdxFiles(loc *ctxLocalityIdx, key []byte, fromTxNum uint64) (exactShard1, exactShard2 uint64, lastIndexedTxNum uint64, ok1, ok2 bool) { 254 if li == nil || loc == nil || loc.bm == nil { 255 return 0, 0, 0, false, false 256 } 257 if loc.reader == nil { 258 loc.reader = recsplit.NewIndexReader(loc.file.src.index) 259 } 260 261 if fromTxNum >= loc.file.endTxNum { 262 return 0, 0, fromTxNum, false, false 263 } 264 265 fromFileNum := fromTxNum / li.aggregationStep / StepsInBiggestFile 266 fn1, fn2, ok1, ok2, err := loc.bm.First2At(loc.reader.Lookup(key), fromFileNum) 267 if err != nil { 268 panic(err) 269 } 270 return fn1 * StepsInBiggestFile, fn2 * StepsInBiggestFile, loc.file.endTxNum, ok1, ok2 271 } 272 273 func (li *LocalityIndex) missedIdxFiles(ii *InvertedIndexContext) (toStep uint64, idxExists bool) { 274 if len(ii.files) == 0 { 275 return 0, true 276 } 277 var item *ctxItem 278 for i := len(ii.files) - 1; i >= 0; i-- { 279 if ii.files[i].src.frozen { 280 item = &ii.files[i] 281 break 282 } 283 } 284 if item != nil { 285 toStep = item.endTxNum / li.aggregationStep 286 } 287 fName := fmt.Sprintf("%s.%d-%d.li", li.filenameBase, 0, toStep) 288 return toStep, dir.FileExist(filepath.Join(li.dir, fName)) 289 } 290 func (li *LocalityIndex) buildFiles(ctx context.Context, ic *InvertedIndexContext, toStep uint64) (files *LocalityIndexFiles, err error) { 291 defer ic.ii.EnableMadvNormalReadAhead().DisableReadAhead() 292 293 logEvery := time.NewTicker(30 * time.Second) 294 defer logEvery.Stop() 295 296 fromStep := uint64(0) 297 count := 0 298 it := ic.iterateKeysLocality(toStep * li.aggregationStep) 299 for it.HasNext() { 300 _, _ = it.Next() 301 count++ 302 } 303 304 fName := fmt.Sprintf("%s.%d-%d.li", li.filenameBase, fromStep, toStep) 305 idxPath := filepath.Join(li.dir, fName) 306 filePath := filepath.Join(li.dir, fmt.Sprintf("%s.%d-%d.l", li.filenameBase, fromStep, toStep)) 307 308 rs, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{ 309 KeyCount: count, 310 Enums: false, 311 BucketSize: 2000, 312 LeafSize: 8, 313 TmpDir: li.tmpdir, 314 IndexFile: idxPath, 315 }, li.logger) 316 if err != nil { 317 return nil, fmt.Errorf("create recsplit: %w", err) 318 } 319 defer rs.Close() 320 rs.LogLvl(log.LvlTrace) 321 322 i := uint64(0) 323 for { 324 dense, err := bitmapdb.NewFixedSizeBitmapsWriter(filePath, int(it.FilesAmount()), uint64(count), li.logger) 325 if err != nil { 326 return nil, err 327 } 328 defer dense.Close() 329 330 it = ic.iterateKeysLocality(toStep * li.aggregationStep) 331 for it.HasNext() { 332 k, inFiles := it.Next() 333 if err := dense.AddArray(i, inFiles); err != nil { 334 return nil, err 335 } 336 if err = rs.AddKey(k, 0); err != nil { 337 return nil, err 338 } 339 i++ 340 341 select { 342 case <-ctx.Done(): 343 return nil, ctx.Err() 344 case <-logEvery.C: 345 li.logger.Info("[LocalityIndex] build", "name", li.filenameBase, "progress", fmt.Sprintf("%.2f%%", 50+it.Progress()/2)) 346 default: 347 } 348 } 349 350 if err := dense.Build(); err != nil { 351 return nil, err 352 } 353 354 if err = rs.Build(ctx); err != nil { 355 if rs.Collision() { 356 li.logger.Debug("Building recsplit. Collision happened. It's ok. Restarting...") 357 rs.ResetNextSalt() 358 } else { 359 return nil, fmt.Errorf("build idx: %w", err) 360 } 361 } else { 362 break 363 } 364 } 365 366 idx, err := recsplit.OpenIndex(idxPath) 367 if err != nil { 368 return nil, err 369 } 370 bm, err := bitmapdb.OpenFixedSizeBitmaps(filePath, int(it.FilesAmount())) 371 if err != nil { 372 return nil, err 373 } 374 return &LocalityIndexFiles{index: idx, bm: bm}, nil 375 } 376 377 func (li *LocalityIndex) integrateFiles(sf LocalityIndexFiles, txNumFrom, txNumTo uint64) { 378 if li.file != nil { 379 li.file.canDelete.Store(true) 380 } 381 li.file = &filesItem{ 382 startTxNum: txNumFrom, 383 endTxNum: txNumTo, 384 index: sf.index, 385 frozen: false, 386 } 387 li.bm = sf.bm 388 li.reCalcRoFiles() 389 } 390 391 func (li *LocalityIndex) BuildMissedIndices(ctx context.Context, ii *InvertedIndexContext) error { 392 if li == nil { 393 return nil 394 } 395 toStep, idxExists := li.missedIdxFiles(ii) 396 if idxExists || toStep == 0 { 397 return nil 398 } 399 fromStep := uint64(0) 400 f, err := li.buildFiles(ctx, ii, toStep) 401 if err != nil { 402 return err 403 } 404 li.integrateFiles(*f, fromStep*li.aggregationStep, toStep*li.aggregationStep) 405 return nil 406 } 407 408 type LocalityIndexFiles struct { 409 index *recsplit.Index 410 bm *bitmapdb.FixedSizeBitmaps 411 } 412 413 func (sf LocalityIndexFiles) Close() { 414 if sf.index != nil { 415 sf.index.Close() 416 } 417 if sf.bm != nil { 418 sf.bm.Close() 419 } 420 } 421 422 type LocalityIterator struct { 423 hc *InvertedIndexContext 424 h ReconHeapOlderFirst 425 files, nextFiles []uint64 426 key, nextKey []byte 427 progress uint64 428 hasNext bool 429 430 totalOffsets, filesAmount uint64 431 } 432 433 func (si *LocalityIterator) advance() { 434 for si.h.Len() > 0 { 435 top := heap.Pop(&si.h).(*ReconItem) 436 key := top.key 437 _, offset := top.g.NextUncompressed() 438 si.progress += offset - top.lastOffset 439 top.lastOffset = offset 440 inStep := uint32(top.startTxNum / si.hc.ii.aggregationStep) 441 if top.g.HasNext() { 442 top.key, _ = top.g.NextUncompressed() 443 heap.Push(&si.h, top) 444 } 445 446 inFile := inStep / StepsInBiggestFile 447 448 if !bytes.Equal(key, si.key) { 449 if si.key == nil { 450 si.key = key 451 si.files = append(si.files, uint64(inFile)) 452 continue 453 } 454 455 si.nextFiles, si.files = si.files, si.nextFiles[:0] 456 si.nextKey = si.key 457 458 si.files = append(si.files, uint64(inFile)) 459 si.key = key 460 si.hasNext = true 461 return 462 } 463 si.files = append(si.files, uint64(inFile)) 464 } 465 si.nextFiles, si.files = si.files, si.nextFiles[:0] 466 si.nextKey = si.key 467 si.hasNext = false 468 } 469 470 func (si *LocalityIterator) HasNext() bool { return si.hasNext } 471 func (si *LocalityIterator) Progress() float64 { 472 return (float64(si.progress) / float64(si.totalOffsets)) * 100 473 } 474 func (si *LocalityIterator) FilesAmount() uint64 { return si.filesAmount } 475 476 func (si *LocalityIterator) Next() ([]byte, []uint64) { 477 si.advance() 478 return si.nextKey, si.nextFiles 479 } 480 481 func (ic *InvertedIndexContext) iterateKeysLocality(uptoTxNum uint64) *LocalityIterator { 482 si := &LocalityIterator{hc: ic} 483 for _, item := range ic.files { 484 if !item.src.frozen || item.startTxNum > uptoTxNum { 485 continue 486 } 487 if assert.Enable { 488 if (item.endTxNum-item.startTxNum)/ic.ii.aggregationStep != StepsInBiggestFile { 489 panic(fmt.Errorf("frozen file of small size: %s", item.src.decompressor.FileName())) 490 } 491 } 492 g := item.src.decompressor.MakeGetter() 493 if g.HasNext() { 494 key, offset := g.NextUncompressed() 495 496 heapItem := &ReconItem{startTxNum: item.startTxNum, endTxNum: item.endTxNum, g: g, txNum: ^item.endTxNum, key: key, startOffset: offset, lastOffset: offset} 497 heap.Push(&si.h, heapItem) 498 } 499 si.totalOffsets += uint64(g.Size()) 500 si.filesAmount++ 501 } 502 si.advance() 503 return si 504 }