github.com/ledgerwatch/erigon-lib@v1.0.0/state/domain.go (about) 1 /* 2 Copyright 2022 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package state 18 19 import ( 20 "bytes" 21 "container/heap" 22 "context" 23 "encoding/binary" 24 "fmt" 25 "math" 26 "os" 27 "path/filepath" 28 "regexp" 29 "strconv" 30 "strings" 31 "sync/atomic" 32 "time" 33 34 "github.com/RoaringBitmap/roaring/roaring64" 35 "github.com/ledgerwatch/erigon-lib/common/background" 36 btree2 "github.com/tidwall/btree" 37 "golang.org/x/sync/errgroup" 38 39 "github.com/ledgerwatch/log/v3" 40 41 "github.com/ledgerwatch/erigon-lib/common" 42 "github.com/ledgerwatch/erigon-lib/common/dir" 43 "github.com/ledgerwatch/erigon-lib/compress" 44 "github.com/ledgerwatch/erigon-lib/kv" 45 "github.com/ledgerwatch/erigon-lib/kv/bitmapdb" 46 "github.com/ledgerwatch/erigon-lib/recsplit" 47 ) 48 49 // filesItem corresponding to a pair of files (.dat and .idx) 50 type filesItem struct { 51 decompressor *compress.Decompressor 52 index *recsplit.Index 53 bindex *BtIndex 54 startTxNum uint64 55 endTxNum uint64 56 57 // Frozen: file of size StepsInBiggestFile. Completely immutable. 58 // Cold: file of size < StepsInBiggestFile. Immutable, but can be closed/removed after merge to bigger file. 59 // Hot: Stored in DB. Providing Snapshot-Isolation by CopyOnWrite. 60 frozen bool // immutable, don't need atomic 61 refcount atomic.Int32 // only for `frozen=false` 62 63 // file can be deleted in 2 cases: 1. when `refcount == 0 && canDelete == true` 2. on app startup when `file.isSubsetOfFrozenFile()` 64 // other processes (which also reading files, may have same logic) 65 canDelete atomic.Bool 66 } 67 68 func newFilesItem(startTxNum, endTxNum uint64, stepSize uint64) *filesItem { 69 startStep := startTxNum / stepSize 70 endStep := endTxNum / stepSize 71 frozen := endStep-startStep == StepsInBiggestFile 72 return &filesItem{startTxNum: startTxNum, endTxNum: endTxNum, frozen: frozen} 73 } 74 75 func (i *filesItem) isSubsetOf(j *filesItem) bool { 76 return (j.startTxNum <= i.startTxNum && i.endTxNum <= j.endTxNum) && (j.startTxNum != i.startTxNum || i.endTxNum != j.endTxNum) 77 } 78 79 func filesItemLess(i, j *filesItem) bool { 80 if i.endTxNum == j.endTxNum { 81 return i.startTxNum > j.startTxNum 82 } 83 return i.endTxNum < j.endTxNum 84 } 85 func (i *filesItem) closeFilesAndRemove() { 86 if i.decompressor != nil { 87 i.decompressor.Close() 88 // paranoic-mode on: don't delete frozen files 89 if !i.frozen { 90 if err := os.Remove(i.decompressor.FilePath()); err != nil { 91 log.Trace("close", "err", err, "file", i.decompressor.FileName()) 92 } 93 } 94 i.decompressor = nil 95 } 96 if i.index != nil { 97 i.index.Close() 98 // paranoic-mode on: don't delete frozen files 99 if !i.frozen { 100 if err := os.Remove(i.index.FilePath()); err != nil { 101 log.Trace("close", "err", err, "file", i.index.FileName()) 102 } 103 } 104 i.index = nil 105 } 106 if i.bindex != nil { 107 i.bindex.Close() 108 if err := os.Remove(i.bindex.FilePath()); err != nil { 109 log.Trace("close", "err", err, "file", i.bindex.FileName()) 110 } 111 i.bindex = nil 112 } 113 } 114 115 type DomainStats struct { 116 MergesCount uint64 117 LastCollationTook time.Duration 118 LastPruneTook time.Duration 119 LastPruneHistTook time.Duration 120 LastFileBuildingTook time.Duration 121 LastCollationSize uint64 122 LastPruneSize uint64 123 124 HistoryQueries *atomic.Uint64 125 TotalQueries *atomic.Uint64 126 EfSearchTime time.Duration 127 DataSize uint64 128 IndexSize uint64 129 FilesCount uint64 130 } 131 132 func (ds *DomainStats) Accumulate(other DomainStats) { 133 ds.HistoryQueries.Add(other.HistoryQueries.Load()) 134 ds.TotalQueries.Add(other.TotalQueries.Load()) 135 ds.EfSearchTime += other.EfSearchTime 136 ds.IndexSize += other.IndexSize 137 ds.DataSize += other.DataSize 138 ds.FilesCount += other.FilesCount 139 } 140 141 // Domain is a part of the state (examples are Accounts, Storage, Code) 142 // Domain should not have any go routines or locks 143 type Domain struct { 144 /* 145 not large: 146 keys: key -> ^step 147 vals: key -> ^step+value (DupSort) 148 large: 149 keys: key -> ^step 150 vals: key + ^step -> value 151 */ 152 153 *History 154 files *btree2.BTreeG[*filesItem] // thread-safe, but maybe need 1 RWLock for all trees in AggregatorV3 155 // roFiles derivative from field `file`, but without garbage (canDelete=true, overlaps, etc...) 156 // MakeContext() using this field in zero-copy way 157 roFiles atomic.Pointer[[]ctxItem] 158 defaultDc *DomainContext 159 keysTable string // key -> invertedStep , invertedStep = ^(txNum / aggregationStep), Needs to be table with DupSort 160 valsTable string // key + invertedStep -> values 161 stats DomainStats 162 mergesCount uint64 163 164 garbageFiles []*filesItem // files that exist on disk, but ignored on opening folder - because they are garbage 165 logger log.Logger 166 } 167 168 func NewDomain(dir, tmpdir string, aggregationStep uint64, 169 filenameBase, keysTable, valsTable, indexKeysTable, historyValsTable, indexTable string, 170 compressVals, largeValues bool, logger log.Logger) (*Domain, error) { 171 d := &Domain{ 172 keysTable: keysTable, 173 valsTable: valsTable, 174 files: btree2.NewBTreeGOptions[*filesItem](filesItemLess, btree2.Options{Degree: 128, NoLocks: false}), 175 stats: DomainStats{HistoryQueries: &atomic.Uint64{}, TotalQueries: &atomic.Uint64{}}, 176 logger: logger, 177 } 178 d.roFiles.Store(&[]ctxItem{}) 179 180 var err error 181 if d.History, err = NewHistory(dir, tmpdir, aggregationStep, filenameBase, indexKeysTable, indexTable, historyValsTable, compressVals, []string{"kv"}, largeValues, logger); err != nil { 182 return nil, err 183 } 184 185 return d, nil 186 } 187 188 // LastStepInDB - return the latest available step in db (at-least 1 value in such step) 189 func (d *Domain) LastStepInDB(tx kv.Tx) (lstInDb uint64) { 190 lst, _ := kv.FirstKey(tx, d.valsTable) 191 if len(lst) > 0 { 192 lstInDb = ^binary.BigEndian.Uint64(lst[len(lst)-8:]) 193 } 194 return lstInDb 195 } 196 197 func (d *Domain) StartWrites() { 198 d.defaultDc = d.MakeContext() 199 d.History.StartWrites() 200 } 201 202 func (d *Domain) FinishWrites() { 203 d.defaultDc.Close() 204 d.History.FinishWrites() 205 } 206 207 // OpenList - main method to open list of files. 208 // It's ok if some files was open earlier. 209 // If some file already open: noop. 210 // If some file already open but not in provided list: close and remove from `files` field. 211 func (d *Domain) OpenList(fNames []string) error { 212 if err := d.History.OpenList(fNames); err != nil { 213 return err 214 } 215 return d.openList(fNames) 216 } 217 218 func (d *Domain) openList(fNames []string) error { 219 d.closeWhatNotInList(fNames) 220 d.garbageFiles = d.scanStateFiles(fNames) 221 if err := d.openFiles(); err != nil { 222 return fmt.Errorf("History.OpenList: %s, %w", d.filenameBase, err) 223 } 224 return nil 225 } 226 227 func (d *Domain) OpenFolder() error { 228 files, err := d.fileNamesOnDisk() 229 if err != nil { 230 return err 231 } 232 return d.OpenList(files) 233 } 234 235 func (d *Domain) GetAndResetStats() DomainStats { 236 r := d.stats 237 r.DataSize, r.IndexSize, r.FilesCount = d.collectFilesStats() 238 239 d.stats = DomainStats{} 240 return r 241 } 242 243 func (d *Domain) scanStateFiles(fileNames []string) (garbageFiles []*filesItem) { 244 re := regexp.MustCompile("^" + d.filenameBase + ".([0-9]+)-([0-9]+).kv$") 245 var err error 246 Loop: 247 for _, name := range fileNames { 248 subs := re.FindStringSubmatch(name) 249 if len(subs) != 3 { 250 if len(subs) != 0 { 251 d.logger.Warn("File ignored by domain scan, more than 3 submatches", "name", name, "submatches", len(subs)) 252 } 253 continue 254 } 255 var startStep, endStep uint64 256 if startStep, err = strconv.ParseUint(subs[1], 10, 64); err != nil { 257 d.logger.Warn("File ignored by domain scan, parsing startTxNum", "error", err, "name", name) 258 continue 259 } 260 if endStep, err = strconv.ParseUint(subs[2], 10, 64); err != nil { 261 d.logger.Warn("File ignored by domain scan, parsing endTxNum", "error", err, "name", name) 262 continue 263 } 264 if startStep > endStep { 265 d.logger.Warn("File ignored by domain scan, startTxNum > endTxNum", "name", name) 266 continue 267 } 268 269 startTxNum, endTxNum := startStep*d.aggregationStep, endStep*d.aggregationStep 270 var newFile = newFilesItem(startTxNum, endTxNum, d.aggregationStep) 271 272 for _, ext := range d.integrityFileExtensions { 273 requiredFile := fmt.Sprintf("%s.%d-%d.%s", d.filenameBase, startStep, endStep, ext) 274 if !dir.FileExist(filepath.Join(d.dir, requiredFile)) { 275 d.logger.Debug(fmt.Sprintf("[snapshots] skip %s because %s doesn't exists", name, requiredFile)) 276 garbageFiles = append(garbageFiles, newFile) 277 continue Loop 278 } 279 } 280 281 if _, has := d.files.Get(newFile); has { 282 continue 283 } 284 285 addNewFile := true 286 var subSets []*filesItem 287 d.files.Walk(func(items []*filesItem) bool { 288 for _, item := range items { 289 if item.isSubsetOf(newFile) { 290 subSets = append(subSets, item) 291 continue 292 } 293 294 if newFile.isSubsetOf(item) { 295 if item.frozen { 296 addNewFile = false 297 garbageFiles = append(garbageFiles, newFile) 298 } 299 continue 300 } 301 } 302 return true 303 }) 304 if addNewFile { 305 d.files.Set(newFile) 306 } 307 } 308 return garbageFiles 309 } 310 311 func (d *Domain) openFiles() (err error) { 312 var totalKeys uint64 313 314 invalidFileItems := make([]*filesItem, 0) 315 d.files.Walk(func(items []*filesItem) bool { 316 for _, item := range items { 317 if item.decompressor != nil { 318 continue 319 } 320 fromStep, toStep := item.startTxNum/d.aggregationStep, item.endTxNum/d.aggregationStep 321 datPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, fromStep, toStep)) 322 if !dir.FileExist(datPath) { 323 invalidFileItems = append(invalidFileItems, item) 324 continue 325 } 326 if item.decompressor, err = compress.NewDecompressor(datPath); err != nil { 327 return false 328 } 329 330 if item.index != nil { 331 continue 332 } 333 idxPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.kvi", d.filenameBase, fromStep, toStep)) 334 if dir.FileExist(idxPath) { 335 if item.index, err = recsplit.OpenIndex(idxPath); err != nil { 336 d.logger.Debug("InvertedIndex.openFiles: %w, %s", err, idxPath) 337 return false 338 } 339 totalKeys += item.index.KeyCount() 340 } 341 if item.bindex == nil { 342 bidxPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.bt", d.filenameBase, fromStep, toStep)) 343 if item.bindex, err = OpenBtreeIndexWithDecompressor(bidxPath, 2048, item.decompressor); err != nil { 344 d.logger.Debug("InvertedIndex.openFiles: %w, %s", err, bidxPath) 345 return false 346 } 347 //totalKeys += item.bindex.KeyCount() 348 } 349 } 350 return true 351 }) 352 if err != nil { 353 return err 354 } 355 for _, item := range invalidFileItems { 356 d.files.Delete(item) 357 } 358 359 d.reCalcRoFiles() 360 return nil 361 } 362 363 func (d *Domain) closeWhatNotInList(fNames []string) { 364 var toDelete []*filesItem 365 d.files.Walk(func(items []*filesItem) bool { 366 Loop1: 367 for _, item := range items { 368 for _, protectName := range fNames { 369 if item.decompressor != nil && item.decompressor.FileName() == protectName { 370 continue Loop1 371 } 372 } 373 toDelete = append(toDelete, item) 374 } 375 return true 376 }) 377 for _, item := range toDelete { 378 if item.decompressor != nil { 379 item.decompressor.Close() 380 item.decompressor = nil 381 } 382 if item.index != nil { 383 item.index.Close() 384 item.index = nil 385 } 386 if item.bindex != nil { 387 item.bindex.Close() 388 item.bindex = nil 389 } 390 d.files.Delete(item) 391 } 392 } 393 394 func (d *Domain) reCalcRoFiles() { 395 roFiles := ctxFiles(d.files) 396 d.roFiles.Store(&roFiles) 397 } 398 399 func (d *Domain) Close() { 400 d.History.Close() 401 d.closeWhatNotInList([]string{}) 402 d.reCalcRoFiles() 403 } 404 405 func (dc *DomainContext) get(key []byte, fromTxNum uint64, roTx kv.Tx) ([]byte, bool, error) { 406 //var invertedStep [8]byte 407 dc.d.stats.TotalQueries.Add(1) 408 409 invertedStep := dc.numBuf 410 binary.BigEndian.PutUint64(invertedStep[:], ^(fromTxNum / dc.d.aggregationStep)) 411 keyCursor, err := roTx.CursorDupSort(dc.d.keysTable) 412 if err != nil { 413 return nil, false, err 414 } 415 defer keyCursor.Close() 416 foundInvStep, err := keyCursor.SeekBothRange(key, invertedStep[:]) 417 if err != nil { 418 return nil, false, err 419 } 420 if len(foundInvStep) == 0 { 421 dc.d.stats.HistoryQueries.Add(1) 422 return dc.readFromFiles(key, fromTxNum) 423 } 424 //keySuffix := make([]byte, len(key)+8) 425 copy(dc.keyBuf[:], key) 426 copy(dc.keyBuf[len(key):], foundInvStep) 427 v, err := roTx.GetOne(dc.d.valsTable, dc.keyBuf[:len(key)+8]) 428 if err != nil { 429 return nil, false, err 430 } 431 return v, true, nil 432 } 433 434 func (dc *DomainContext) Get(key1, key2 []byte, roTx kv.Tx) ([]byte, error) { 435 //key := make([]byte, len(key1)+len(key2)) 436 copy(dc.keyBuf[:], key1) 437 copy(dc.keyBuf[len(key1):], key2) 438 // keys larger than 52 bytes will panic 439 v, _, err := dc.get(dc.keyBuf[:len(key1)+len(key2)], dc.d.txNum, roTx) 440 return v, err 441 } 442 443 func (d *Domain) update(key, original []byte) error { 444 var invertedStep [8]byte 445 binary.BigEndian.PutUint64(invertedStep[:], ^(d.txNum / d.aggregationStep)) 446 if err := d.tx.Put(d.keysTable, key, invertedStep[:]); err != nil { 447 return err 448 } 449 return nil 450 } 451 452 func (d *Domain) Put(key1, key2, val []byte) error { 453 key := make([]byte, len(key1)+len(key2)) 454 copy(key, key1) 455 copy(key[len(key1):], key2) 456 original, _, err := d.defaultDc.get(key, d.txNum, d.tx) 457 if err != nil { 458 return err 459 } 460 if bytes.Equal(original, val) { 461 return nil 462 } 463 // This call to update needs to happen before d.tx.Put() later, because otherwise the content of `original`` slice is invalidated 464 if err = d.History.AddPrevValue(key1, key2, original); err != nil { 465 return err 466 } 467 if err = d.update(key, original); err != nil { 468 return err 469 } 470 invertedStep := ^(d.txNum / d.aggregationStep) 471 keySuffix := make([]byte, len(key)+8) 472 copy(keySuffix, key) 473 binary.BigEndian.PutUint64(keySuffix[len(key):], invertedStep) 474 if err = d.tx.Put(d.valsTable, keySuffix, val); err != nil { 475 return err 476 } 477 return nil 478 } 479 480 func (d *Domain) Delete(key1, key2 []byte) error { 481 key := make([]byte, len(key1)+len(key2)) 482 copy(key, key1) 483 copy(key[len(key1):], key2) 484 original, found, err := d.defaultDc.get(key, d.txNum, d.tx) 485 if err != nil { 486 return err 487 } 488 if !found { 489 return nil 490 } 491 // This call to update needs to happen before d.tx.Delete() later, because otherwise the content of `original`` slice is invalidated 492 if err = d.History.AddPrevValue(key1, key2, original); err != nil { 493 return err 494 } 495 if err = d.update(key, original); err != nil { 496 return err 497 } 498 invertedStep := ^(d.txNum / d.aggregationStep) 499 keySuffix := make([]byte, len(key)+8) 500 copy(keySuffix, key) 501 binary.BigEndian.PutUint64(keySuffix[len(key):], invertedStep) 502 if err = d.tx.Delete(d.valsTable, keySuffix); err != nil { 503 return err 504 } 505 return nil 506 } 507 508 type CursorType uint8 509 510 const ( 511 FILE_CURSOR CursorType = iota 512 DB_CURSOR 513 ) 514 515 // CursorItem is the item in the priority queue used to do merge interation 516 // over storage of a given account 517 type CursorItem struct { 518 c kv.CursorDupSort 519 dg *compress.Getter 520 dg2 *compress.Getter 521 key []byte 522 val []byte 523 endTxNum uint64 524 t CursorType // Whether this item represents state file or DB record, or tree 525 reverse bool 526 } 527 528 type CursorHeap []*CursorItem 529 530 func (ch CursorHeap) Len() int { 531 return len(ch) 532 } 533 534 func (ch CursorHeap) Less(i, j int) bool { 535 cmp := bytes.Compare(ch[i].key, ch[j].key) 536 if cmp == 0 { 537 // when keys match, the items with later blocks are preferred 538 if ch[i].reverse { 539 return ch[i].endTxNum > ch[j].endTxNum 540 } 541 return ch[i].endTxNum < ch[j].endTxNum 542 } 543 return cmp < 0 544 } 545 546 func (ch *CursorHeap) Swap(i, j int) { 547 (*ch)[i], (*ch)[j] = (*ch)[j], (*ch)[i] 548 } 549 550 func (ch *CursorHeap) Push(x interface{}) { 551 *ch = append(*ch, x.(*CursorItem)) 552 } 553 554 func (ch *CursorHeap) Pop() interface{} { 555 old := *ch 556 n := len(old) 557 x := old[n-1] 558 old[n-1] = nil 559 *ch = old[0 : n-1] 560 return x 561 } 562 563 // filesItem corresponding to a pair of files (.dat and .idx) 564 type ctxItem struct { 565 getter *compress.Getter 566 reader *recsplit.IndexReader 567 startTxNum uint64 568 endTxNum uint64 569 570 i int 571 src *filesItem 572 } 573 574 type ctxLocalityIdx struct { 575 reader *recsplit.IndexReader 576 bm *bitmapdb.FixedSizeBitmaps 577 file *ctxItem 578 } 579 580 func ctxItemLess(i, j ctxItem) bool { //nolint 581 if i.endTxNum == j.endTxNum { 582 return i.startTxNum > j.startTxNum 583 } 584 return i.endTxNum < j.endTxNum 585 } 586 587 // DomainContext allows accesing the same domain from multiple go-routines 588 type DomainContext struct { 589 d *Domain 590 files []ctxItem 591 getters []*compress.Getter 592 readers []*BtIndex 593 hc *HistoryContext 594 keyBuf [60]byte // 52b key and 8b for inverted step 595 numBuf [8]byte 596 } 597 598 func (dc *DomainContext) statelessGetter(i int) *compress.Getter { 599 if dc.getters == nil { 600 dc.getters = make([]*compress.Getter, len(dc.files)) 601 } 602 r := dc.getters[i] 603 if r == nil { 604 r = dc.files[i].src.decompressor.MakeGetter() 605 dc.getters[i] = r 606 } 607 return r 608 } 609 610 func (dc *DomainContext) statelessBtree(i int) *BtIndex { 611 if dc.readers == nil { 612 dc.readers = make([]*BtIndex, len(dc.files)) 613 } 614 r := dc.readers[i] 615 if r == nil { 616 r = dc.files[i].src.bindex 617 dc.readers[i] = r 618 } 619 return r 620 } 621 622 func (d *Domain) collectFilesStats() (datsz, idxsz, files uint64) { 623 d.History.files.Walk(func(items []*filesItem) bool { 624 for _, item := range items { 625 if item.index == nil { 626 return false 627 } 628 datsz += uint64(item.decompressor.Size()) 629 idxsz += uint64(item.index.Size()) 630 files += 2 631 } 632 return true 633 }) 634 635 d.files.Walk(func(items []*filesItem) bool { 636 for _, item := range items { 637 if item.index == nil { 638 return false 639 } 640 datsz += uint64(item.decompressor.Size()) 641 idxsz += uint64(item.index.Size()) 642 idxsz += uint64(item.bindex.Size()) 643 files += 3 644 } 645 return true 646 }) 647 648 fcnt, fsz, isz := d.History.InvertedIndex.collectFilesStat() 649 datsz += fsz 650 files += fcnt 651 idxsz += isz 652 return 653 } 654 655 func (d *Domain) MakeContext() *DomainContext { 656 dc := &DomainContext{ 657 d: d, 658 hc: d.History.MakeContext(), 659 files: *d.roFiles.Load(), 660 } 661 for _, item := range dc.files { 662 if !item.src.frozen { 663 item.src.refcount.Add(1) 664 } 665 } 666 667 return dc 668 } 669 670 // Collation is the set of compressors created after aggregation 671 type Collation struct { 672 valuesComp *compress.Compressor 673 historyComp *compress.Compressor 674 indexBitmaps map[string]*roaring64.Bitmap 675 valuesPath string 676 historyPath string 677 valuesCount int 678 historyCount int 679 } 680 681 func (c Collation) Close() { 682 if c.valuesComp != nil { 683 c.valuesComp.Close() 684 } 685 if c.historyComp != nil { 686 c.historyComp.Close() 687 } 688 } 689 690 type kvpair struct { 691 k, v []byte 692 } 693 694 func (d *Domain) writeCollationPair(valuesComp *compress.Compressor, pairs chan kvpair) (count int, err error) { 695 for kv := range pairs { 696 if err = valuesComp.AddUncompressedWord(kv.k); err != nil { 697 return count, fmt.Errorf("add %s values key [%x]: %w", d.filenameBase, kv.k, err) 698 } 699 mxCollationSize.Inc() 700 count++ // Only counting keys, not values 701 if err = valuesComp.AddUncompressedWord(kv.v); err != nil { 702 return count, fmt.Errorf("add %s values val [%x]=>[%x]: %w", d.filenameBase, kv.k, kv.v, err) 703 } 704 } 705 return count, nil 706 } 707 708 // nolint 709 func (d *Domain) aggregate(ctx context.Context, step uint64, txFrom, txTo uint64, tx kv.Tx, ps *background.ProgressSet) (err error) { 710 mxRunningCollations.Inc() 711 start := time.Now() 712 collation, err := d.collateStream(ctx, step, txFrom, txTo, tx) 713 mxRunningCollations.Dec() 714 mxCollateTook.UpdateDuration(start) 715 716 mxCollationSize.Set(uint64(collation.valuesComp.Count())) 717 mxCollationSizeHist.Set(uint64(collation.historyComp.Count())) 718 719 if err != nil { 720 collation.Close() 721 //return fmt.Errorf("domain collation %q has failed: %w", d.filenameBase, err) 722 return err 723 } 724 725 mxRunningMerges.Inc() 726 727 start = time.Now() 728 sf, err := d.buildFiles(ctx, step, collation, ps) 729 collation.Close() 730 defer sf.Close() 731 732 if err != nil { 733 sf.Close() 734 mxRunningMerges.Dec() 735 return 736 } 737 738 mxRunningMerges.Dec() 739 740 d.integrateFiles(sf, step*d.aggregationStep, (step+1)*d.aggregationStep) 741 d.stats.LastFileBuildingTook = time.Since(start) 742 return nil 743 } 744 745 // collate gathers domain changes over the specified step, using read-only transaction, 746 // and returns compressors, elias fano, and bitmaps 747 // [txFrom; txTo) 748 func (d *Domain) collateStream(ctx context.Context, step, txFrom, txTo uint64, roTx kv.Tx) (Collation, error) { 749 started := time.Now() 750 defer func() { 751 d.stats.LastCollationTook = time.Since(started) 752 }() 753 754 hCollation, err := d.History.collate(step, txFrom, txTo, roTx) 755 if err != nil { 756 return Collation{}, err 757 } 758 759 var valuesComp *compress.Compressor 760 closeComp := true 761 defer func() { 762 if closeComp { 763 if valuesComp != nil { 764 valuesComp.Close() 765 } 766 } 767 }() 768 769 valuesPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, step, step+1)) 770 if valuesComp, err = compress.NewCompressor(context.Background(), "collate values", valuesPath, d.tmpdir, compress.MinPatternScore, 1, log.LvlTrace, d.logger); err != nil { 771 return Collation{}, fmt.Errorf("create %s values compressor: %w", d.filenameBase, err) 772 } 773 774 keysCursor, err := roTx.CursorDupSort(d.keysTable) 775 if err != nil { 776 return Collation{}, fmt.Errorf("create %s keys cursor: %w", d.filenameBase, err) 777 } 778 defer keysCursor.Close() 779 780 var ( 781 k, v []byte 782 pos uint64 783 valCount int 784 pairs = make(chan kvpair, 1024) 785 ) 786 787 //totalKeys, err := keysCursor.Count() 788 //if err != nil { 789 // return Collation{}, fmt.Errorf("failed to obtain keys count for domain %q", d.filenameBase) 790 //} 791 792 eg, _ := errgroup.WithContext(ctx) 793 eg.Go(func() error { 794 valCount, err = d.writeCollationPair(valuesComp, pairs) 795 return err 796 }) 797 798 var ( 799 stepBytes = make([]byte, 8) 800 keySuffix = make([]byte, 256+8) 801 ) 802 binary.BigEndian.PutUint64(stepBytes, ^step) 803 804 for k, _, err = keysCursor.First(); err == nil && k != nil; k, _, err = keysCursor.NextNoDup() { 805 pos++ 806 807 if v, err = keysCursor.LastDup(); err != nil { 808 return Collation{}, fmt.Errorf("find last %s key for aggregation step k=[%x]: %w", d.filenameBase, k, err) 809 } 810 if bytes.Equal(v, stepBytes) { 811 copy(keySuffix, k) 812 copy(keySuffix[len(k):], v) 813 ks := len(k) + len(v) 814 815 v, err := roTx.GetOne(d.valsTable, keySuffix[:ks]) 816 if err != nil { 817 return Collation{}, fmt.Errorf("find last %s value for aggregation step k=[%x]: %w", d.filenameBase, k, err) 818 } 819 820 select { 821 case <-ctx.Done(): 822 return Collation{}, ctx.Err() 823 default: 824 } 825 826 pairs <- kvpair{k: k, v: v} 827 } 828 } 829 close(pairs) 830 if err != nil { 831 return Collation{}, fmt.Errorf("iterate over %s keys cursor: %w", d.filenameBase, err) 832 } 833 834 if err := eg.Wait(); err != nil { 835 return Collation{}, fmt.Errorf("collate over %s keys cursor: %w", d.filenameBase, err) 836 } 837 838 closeComp = false 839 return Collation{ 840 valuesPath: valuesPath, 841 valuesComp: valuesComp, 842 valuesCount: valCount, 843 historyPath: hCollation.historyPath, 844 historyComp: hCollation.historyComp, 845 historyCount: hCollation.historyCount, 846 indexBitmaps: hCollation.indexBitmaps, 847 }, nil 848 } 849 850 // collate gathers domain changes over the specified step, using read-only transaction, 851 // and returns compressors, elias fano, and bitmaps 852 // [txFrom; txTo) 853 func (d *Domain) collate(ctx context.Context, step, txFrom, txTo uint64, roTx kv.Tx, logEvery *time.Ticker) (Collation, error) { 854 started := time.Now() 855 defer func() { 856 d.stats.LastCollationTook = time.Since(started) 857 }() 858 859 hCollation, err := d.History.collate(step, txFrom, txTo, roTx) 860 if err != nil { 861 return Collation{}, err 862 } 863 var valuesComp *compress.Compressor 864 closeComp := true 865 defer func() { 866 if closeComp { 867 hCollation.Close() 868 if valuesComp != nil { 869 valuesComp.Close() 870 } 871 } 872 }() 873 valuesPath := filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, step, step+1)) 874 if valuesComp, err = compress.NewCompressor(context.Background(), "collate values", valuesPath, d.tmpdir, compress.MinPatternScore, 1, log.LvlTrace, d.logger); err != nil { 875 return Collation{}, fmt.Errorf("create %s values compressor: %w", d.filenameBase, err) 876 } 877 keysCursor, err := roTx.CursorDupSort(d.keysTable) 878 if err != nil { 879 return Collation{}, fmt.Errorf("create %s keys cursor: %w", d.filenameBase, err) 880 } 881 defer keysCursor.Close() 882 883 var ( 884 k, v []byte 885 pos uint64 886 valuesCount uint 887 ) 888 889 //TODO: use prorgesSet 890 //totalKeys, err := keysCursor.Count() 891 //if err != nil { 892 // return Collation{}, fmt.Errorf("failed to obtain keys count for domain %q", d.filenameBase) 893 //} 894 for k, _, err = keysCursor.First(); err == nil && k != nil; k, _, err = keysCursor.NextNoDup() { 895 if err != nil { 896 return Collation{}, err 897 } 898 pos++ 899 select { 900 case <-ctx.Done(): 901 d.logger.Warn("[snapshots] collate domain cancelled", "name", d.filenameBase, "err", ctx.Err()) 902 return Collation{}, ctx.Err() 903 default: 904 } 905 906 if v, err = keysCursor.LastDup(); err != nil { 907 return Collation{}, fmt.Errorf("find last %s key for aggregation step k=[%x]: %w", d.filenameBase, k, err) 908 } 909 s := ^binary.BigEndian.Uint64(v) 910 if s == step { 911 keySuffix := make([]byte, len(k)+8) 912 copy(keySuffix, k) 913 copy(keySuffix[len(k):], v) 914 v, err := roTx.GetOne(d.valsTable, keySuffix) 915 if err != nil { 916 return Collation{}, fmt.Errorf("find last %s value for aggregation step k=[%x]: %w", d.filenameBase, k, err) 917 } 918 if err = valuesComp.AddUncompressedWord(k); err != nil { 919 return Collation{}, fmt.Errorf("add %s values key [%x]: %w", d.filenameBase, k, err) 920 } 921 valuesCount++ // Only counting keys, not values 922 if err = valuesComp.AddUncompressedWord(v); err != nil { 923 return Collation{}, fmt.Errorf("add %s values val [%x]=>[%x]: %w", d.filenameBase, k, v, err) 924 } 925 } 926 } 927 if err != nil { 928 return Collation{}, fmt.Errorf("iterate over %s keys cursor: %w", d.filenameBase, err) 929 } 930 closeComp = false 931 return Collation{ 932 valuesPath: valuesPath, 933 valuesComp: valuesComp, 934 valuesCount: int(valuesCount), 935 historyPath: hCollation.historyPath, 936 historyComp: hCollation.historyComp, 937 historyCount: hCollation.historyCount, 938 indexBitmaps: hCollation.indexBitmaps, 939 }, nil 940 } 941 942 type StaticFiles struct { 943 valuesDecomp *compress.Decompressor 944 valuesIdx *recsplit.Index 945 valuesBt *BtIndex 946 historyDecomp *compress.Decompressor 947 historyIdx *recsplit.Index 948 efHistoryDecomp *compress.Decompressor 949 efHistoryIdx *recsplit.Index 950 } 951 952 func (sf StaticFiles) Close() { 953 if sf.valuesDecomp != nil { 954 sf.valuesDecomp.Close() 955 } 956 if sf.valuesIdx != nil { 957 sf.valuesIdx.Close() 958 } 959 if sf.valuesBt != nil { 960 sf.valuesBt.Close() 961 } 962 if sf.historyDecomp != nil { 963 sf.historyDecomp.Close() 964 } 965 if sf.historyIdx != nil { 966 sf.historyIdx.Close() 967 } 968 if sf.efHistoryDecomp != nil { 969 sf.efHistoryDecomp.Close() 970 } 971 if sf.efHistoryIdx != nil { 972 sf.efHistoryIdx.Close() 973 } 974 } 975 976 // buildFiles performs potentially resource intensive operations of creating 977 // static files and their indices 978 func (d *Domain) buildFiles(ctx context.Context, step uint64, collation Collation, ps *background.ProgressSet) (StaticFiles, error) { 979 hStaticFiles, err := d.History.buildFiles(ctx, step, HistoryCollation{ 980 historyPath: collation.historyPath, 981 historyComp: collation.historyComp, 982 historyCount: collation.historyCount, 983 indexBitmaps: collation.indexBitmaps, 984 }, ps) 985 if err != nil { 986 return StaticFiles{}, err 987 } 988 valuesComp := collation.valuesComp 989 var valuesDecomp *compress.Decompressor 990 var valuesIdx *recsplit.Index 991 closeComp := true 992 defer func() { 993 if closeComp { 994 hStaticFiles.Close() 995 if valuesComp != nil { 996 valuesComp.Close() 997 } 998 if valuesDecomp != nil { 999 valuesDecomp.Close() 1000 } 1001 if valuesIdx != nil { 1002 valuesIdx.Close() 1003 } 1004 } 1005 }() 1006 if d.noFsync { 1007 valuesComp.DisableFsync() 1008 } 1009 if err = valuesComp.Compress(); err != nil { 1010 return StaticFiles{}, fmt.Errorf("compress %s values: %w", d.filenameBase, err) 1011 } 1012 valuesComp.Close() 1013 valuesComp = nil 1014 if valuesDecomp, err = compress.NewDecompressor(collation.valuesPath); err != nil { 1015 return StaticFiles{}, fmt.Errorf("open %s values decompressor: %w", d.filenameBase, err) 1016 } 1017 1018 valuesIdxFileName := fmt.Sprintf("%s.%d-%d.kvi", d.filenameBase, step, step+1) 1019 valuesIdxPath := filepath.Join(d.dir, valuesIdxFileName) 1020 { 1021 p := ps.AddNew(valuesIdxFileName, uint64(valuesDecomp.Count()*2)) 1022 defer ps.Delete(p) 1023 if valuesIdx, err = buildIndexThenOpen(ctx, valuesDecomp, valuesIdxPath, d.tmpdir, collation.valuesCount, false, p, d.logger, d.noFsync); err != nil { 1024 return StaticFiles{}, fmt.Errorf("build %s values idx: %w", d.filenameBase, err) 1025 } 1026 } 1027 1028 var bt *BtIndex 1029 { 1030 btFileName := strings.TrimSuffix(valuesIdxFileName, "kvi") + "bt" 1031 btPath := filepath.Join(d.dir, btFileName) 1032 p := ps.AddNew(btFileName, uint64(valuesDecomp.Count()*2)) 1033 defer ps.Delete(p) 1034 bt, err = CreateBtreeIndexWithDecompressor(btPath, DefaultBtreeM, valuesDecomp, p, d.tmpdir, d.logger) 1035 if err != nil { 1036 return StaticFiles{}, fmt.Errorf("build %s values bt idx: %w", d.filenameBase, err) 1037 } 1038 } 1039 1040 closeComp = false 1041 return StaticFiles{ 1042 valuesDecomp: valuesDecomp, 1043 valuesIdx: valuesIdx, 1044 valuesBt: bt, 1045 historyDecomp: hStaticFiles.historyDecomp, 1046 historyIdx: hStaticFiles.historyIdx, 1047 efHistoryDecomp: hStaticFiles.efHistoryDecomp, 1048 efHistoryIdx: hStaticFiles.efHistoryIdx, 1049 }, nil 1050 } 1051 1052 func (d *Domain) missedIdxFiles() (l []*filesItem) { 1053 d.files.Walk(func(items []*filesItem) bool { // don't run slow logic while iterating on btree 1054 for _, item := range items { 1055 fromStep, toStep := item.startTxNum/d.aggregationStep, item.endTxNum/d.aggregationStep 1056 if !dir.FileExist(filepath.Join(d.dir, fmt.Sprintf("%s.%d-%d.bt", d.filenameBase, fromStep, toStep))) { 1057 l = append(l, item) 1058 } 1059 } 1060 return true 1061 }) 1062 return l 1063 } 1064 1065 // BuildMissedIndices - produce .efi/.vi/.kvi from .ef/.v/.kv 1066 func (d *Domain) BuildMissedIndices(ctx context.Context, g *errgroup.Group, ps *background.ProgressSet) (err error) { 1067 d.History.BuildMissedIndices(ctx, g, ps) 1068 d.InvertedIndex.BuildMissedIndices(ctx, g, ps) 1069 for _, item := range d.missedIdxFiles() { 1070 //TODO: build .kvi 1071 fitem := item 1072 g.Go(func() error { 1073 idxPath := filepath.Join(fitem.decompressor.FilePath(), fitem.decompressor.FileName()) 1074 idxPath = strings.TrimSuffix(idxPath, "kv") + "bt" 1075 1076 p := ps.AddNew("fixme", uint64(fitem.decompressor.Count())) 1077 defer ps.Delete(p) 1078 if err := BuildBtreeIndexWithDecompressor(idxPath, fitem.decompressor, p, d.tmpdir, d.logger); err != nil { 1079 return fmt.Errorf("failed to build btree index for %s: %w", fitem.decompressor.FileName(), err) 1080 } 1081 return nil 1082 }) 1083 } 1084 return nil 1085 } 1086 1087 func buildIndexThenOpen(ctx context.Context, d *compress.Decompressor, idxPath, tmpdir string, count int, values bool, p *background.Progress, logger log.Logger, noFsync bool) (*recsplit.Index, error) { 1088 if err := buildIndex(ctx, d, idxPath, tmpdir, count, values, p, logger, noFsync); err != nil { 1089 return nil, err 1090 } 1091 return recsplit.OpenIndex(idxPath) 1092 } 1093 1094 func buildIndex(ctx context.Context, d *compress.Decompressor, idxPath, tmpdir string, count int, values bool, p *background.Progress, logger log.Logger, noFsync bool) error { 1095 var rs *recsplit.RecSplit 1096 var err error 1097 if rs, err = recsplit.NewRecSplit(recsplit.RecSplitArgs{ 1098 KeyCount: count, 1099 Enums: false, 1100 BucketSize: 2000, 1101 LeafSize: 8, 1102 TmpDir: tmpdir, 1103 IndexFile: idxPath, 1104 }, logger); err != nil { 1105 return fmt.Errorf("create recsplit: %w", err) 1106 } 1107 defer rs.Close() 1108 rs.LogLvl(log.LvlTrace) 1109 if noFsync { 1110 rs.DisableFsync() 1111 } 1112 defer d.EnableMadvNormal().DisableReadAhead() 1113 1114 word := make([]byte, 0, 256) 1115 var keyPos, valPos uint64 1116 g := d.MakeGetter() 1117 for { 1118 if err := ctx.Err(); err != nil { 1119 logger.Warn("recsplit index building cancelled", "err", err) 1120 return err 1121 } 1122 g.Reset(0) 1123 for g.HasNext() { 1124 word, valPos = g.Next(word[:0]) 1125 if values { 1126 if err = rs.AddKey(word, valPos); err != nil { 1127 return fmt.Errorf("add idx key [%x]: %w", word, err) 1128 } 1129 } else { 1130 if err = rs.AddKey(word, keyPos); err != nil { 1131 return fmt.Errorf("add idx key [%x]: %w", word, err) 1132 } 1133 } 1134 // Skip value 1135 keyPos, _ = g.Skip() 1136 1137 p.Processed.Add(1) 1138 } 1139 if err = rs.Build(ctx); err != nil { 1140 if rs.Collision() { 1141 logger.Info("Building recsplit. Collision happened. It's ok. Restarting...") 1142 rs.ResetNextSalt() 1143 } else { 1144 return fmt.Errorf("build idx: %w", err) 1145 } 1146 } else { 1147 break 1148 } 1149 } 1150 return nil 1151 } 1152 1153 func (d *Domain) integrateFiles(sf StaticFiles, txNumFrom, txNumTo uint64) { 1154 d.History.integrateFiles(HistoryFiles{ 1155 historyDecomp: sf.historyDecomp, 1156 historyIdx: sf.historyIdx, 1157 efHistoryDecomp: sf.efHistoryDecomp, 1158 efHistoryIdx: sf.efHistoryIdx, 1159 }, txNumFrom, txNumTo) 1160 1161 fi := newFilesItem(txNumFrom, txNumTo, d.aggregationStep) 1162 fi.decompressor = sf.valuesDecomp 1163 fi.index = sf.valuesIdx 1164 fi.bindex = sf.valuesBt 1165 d.files.Set(fi) 1166 1167 d.reCalcRoFiles() 1168 } 1169 1170 // [txFrom; txTo) 1171 func (d *Domain) prune(ctx context.Context, step uint64, txFrom, txTo, limit uint64, logEvery *time.Ticker) error { 1172 defer func(t time.Time) { d.stats.LastPruneTook = time.Since(t) }(time.Now()) 1173 mxPruningProgress.Inc() 1174 defer mxPruningProgress.Dec() 1175 1176 var ( 1177 _state = "scan steps" 1178 pos atomic.Uint64 1179 totalKeys uint64 1180 ) 1181 1182 keysCursor, err := d.tx.RwCursorDupSort(d.keysTable) 1183 if err != nil { 1184 return fmt.Errorf("%s keys cursor: %w", d.filenameBase, err) 1185 } 1186 defer keysCursor.Close() 1187 1188 totalKeys, err = keysCursor.Count() 1189 if err != nil { 1190 return fmt.Errorf("get count of %s keys: %w", d.filenameBase, err) 1191 } 1192 1193 var ( 1194 k, v, stepBytes []byte 1195 keyMaxSteps = make(map[string]uint64) 1196 c = 0 1197 ) 1198 stepBytes = make([]byte, 8) 1199 binary.BigEndian.PutUint64(stepBytes, ^step) 1200 1201 for k, v, err = keysCursor.First(); err == nil && k != nil; k, v, err = keysCursor.Next() { 1202 if bytes.Equal(v, stepBytes) { 1203 c++ 1204 kl, vl, err := keysCursor.PrevDup() 1205 if err != nil { 1206 break 1207 } 1208 if kl == nil && vl == nil { 1209 continue 1210 } 1211 s := ^binary.BigEndian.Uint64(vl) 1212 if s > step { 1213 _, vn, err := keysCursor.NextDup() 1214 if err != nil { 1215 break 1216 } 1217 if bytes.Equal(vn, stepBytes) { 1218 if err := keysCursor.DeleteCurrent(); err != nil { 1219 return fmt.Errorf("prune key %x: %w", k, err) 1220 } 1221 mxPruneSize.Inc() 1222 keyMaxSteps[string(k)] = s 1223 } 1224 } 1225 } 1226 pos.Add(1) 1227 1228 if ctx.Err() != nil { 1229 d.logger.Warn("[snapshots] prune domain cancelled", "name", d.filenameBase, "err", ctx.Err()) 1230 return ctx.Err() 1231 } 1232 1233 select { 1234 case <-ctx.Done(): 1235 return ctx.Err() 1236 case <-logEvery.C: 1237 d.logger.Info("[snapshots] prune domain", "name", d.filenameBase, 1238 "stage", _state, 1239 "range", fmt.Sprintf("%.2f-%.2f", float64(txFrom)/float64(d.aggregationStep), float64(txTo)/float64(d.aggregationStep)), 1240 "progress", fmt.Sprintf("%.2f%%", (float64(pos.Load())/float64(totalKeys))*100)) 1241 default: 1242 } 1243 } 1244 if err != nil { 1245 return fmt.Errorf("iterate of %s keys: %w", d.filenameBase, err) 1246 } 1247 1248 pos.Store(0) 1249 // It is important to clean up tables in a specific order 1250 // First keysTable, because it is the first one access in the `get` function, i.e. if the record is deleted from there, other tables will not be accessed 1251 var valsCursor kv.RwCursor 1252 if valsCursor, err = d.tx.RwCursor(d.valsTable); err != nil { 1253 return fmt.Errorf("%s vals cursor: %w", d.filenameBase, err) 1254 } 1255 defer valsCursor.Close() 1256 1257 for k, _, err := valsCursor.First(); err == nil && k != nil; k, _, err = valsCursor.Next() { 1258 if bytes.HasSuffix(k, stepBytes) { 1259 if _, ok := keyMaxSteps[string(k)]; !ok { 1260 continue 1261 } 1262 if err := valsCursor.DeleteCurrent(); err != nil { 1263 return fmt.Errorf("prune val %x: %w", k, err) 1264 } 1265 mxPruneSize.Inc() 1266 } 1267 pos.Add(1) 1268 //_prog = 100 * (float64(pos) / float64(totalKeys)) 1269 1270 select { 1271 case <-ctx.Done(): 1272 return ctx.Err() 1273 case <-logEvery.C: 1274 d.logger.Info("[snapshots] prune domain", "name", d.filenameBase, "step", step) 1275 //"steps", fmt.Sprintf("%.2f-%.2f", float64(txFrom)/float64(d.aggregationStep), float64(txTo)/float64(d.aggregationStep))) 1276 default: 1277 } 1278 } 1279 if err != nil { 1280 return fmt.Errorf("iterate over %s vals: %w", d.filenameBase, err) 1281 } 1282 1283 defer func(t time.Time) { d.stats.LastPruneHistTook = time.Since(t) }(time.Now()) 1284 1285 if err = d.History.prune(ctx, txFrom, txTo, limit, logEvery); err != nil { 1286 return fmt.Errorf("prune history at step %d [%d, %d): %w", step, txFrom, txTo, err) 1287 } 1288 return nil 1289 } 1290 1291 func (d *Domain) isEmpty(tx kv.Tx) (bool, error) { 1292 k, err := kv.FirstKey(tx, d.keysTable) 1293 if err != nil { 1294 return false, err 1295 } 1296 k2, err := kv.FirstKey(tx, d.valsTable) 1297 if err != nil { 1298 return false, err 1299 } 1300 isEmptyHist, err := d.History.isEmpty(tx) 1301 if err != nil { 1302 return false, err 1303 } 1304 return k == nil && k2 == nil && isEmptyHist, nil 1305 } 1306 1307 // nolint 1308 func (d *Domain) warmup(ctx context.Context, txFrom, limit uint64, tx kv.Tx) error { 1309 domainKeysCursor, err := tx.CursorDupSort(d.keysTable) 1310 if err != nil { 1311 return fmt.Errorf("create %s domain cursor: %w", d.filenameBase, err) 1312 } 1313 defer domainKeysCursor.Close() 1314 var txKey [8]byte 1315 binary.BigEndian.PutUint64(txKey[:], txFrom) 1316 idxC, err := tx.CursorDupSort(d.keysTable) 1317 if err != nil { 1318 return err 1319 } 1320 defer idxC.Close() 1321 valsC, err := tx.Cursor(d.valsTable) 1322 if err != nil { 1323 return err 1324 } 1325 defer valsC.Close() 1326 k, v, err := domainKeysCursor.Seek(txKey[:]) 1327 if err != nil { 1328 return err 1329 } 1330 if k == nil { 1331 return nil 1332 } 1333 txFrom = binary.BigEndian.Uint64(k) 1334 txTo := txFrom + d.aggregationStep 1335 if limit != math.MaxUint64 && limit != 0 { 1336 txTo = txFrom + limit 1337 } 1338 for ; err == nil && k != nil; k, v, err = domainKeysCursor.Next() { 1339 txNum := binary.BigEndian.Uint64(k) 1340 if txNum >= txTo { 1341 break 1342 } 1343 _, _, _ = valsC.Seek(v[len(v)-8:]) 1344 _, _ = idxC.SeekBothRange(v[:len(v)-8], k) 1345 1346 select { 1347 case <-ctx.Done(): 1348 return ctx.Err() 1349 default: 1350 } 1351 } 1352 if err != nil { 1353 return fmt.Errorf("iterate over %s domain keys: %w", d.filenameBase, err) 1354 } 1355 1356 return d.History.warmup(ctx, txFrom, limit, tx) 1357 } 1358 1359 var COMPARE_INDEXES = false // if true, will compare values from Btree and INvertedIndex 1360 1361 func (dc *DomainContext) readFromFiles(filekey []byte, fromTxNum uint64) ([]byte, bool, error) { 1362 var val []byte 1363 var found bool 1364 1365 for i := len(dc.files) - 1; i >= 0; i-- { 1366 if dc.files[i].endTxNum < fromTxNum { 1367 break 1368 } 1369 reader := dc.statelessBtree(i) 1370 if reader.Empty() { 1371 continue 1372 } 1373 cur, err := reader.Seek(filekey) 1374 if err != nil { 1375 //return nil, false, nil //TODO: uncomment me 1376 return nil, false, err 1377 } 1378 if cur == nil { 1379 continue 1380 } 1381 1382 if bytes.Equal(cur.Key(), filekey) { 1383 val = cur.Value() 1384 found = true 1385 break 1386 } 1387 } 1388 return val, found, nil 1389 } 1390 1391 // historyBeforeTxNum searches history for a value of specified key before txNum 1392 // second return value is true if the value is found in the history (even if it is nil) 1393 func (dc *DomainContext) historyBeforeTxNum(key []byte, txNum uint64, roTx kv.Tx) ([]byte, bool, error) { 1394 dc.d.stats.HistoryQueries.Add(1) 1395 1396 v, found, err := dc.hc.GetNoState(key, txNum) 1397 if err != nil { 1398 return nil, false, err 1399 } 1400 if found { 1401 return v, true, nil 1402 } 1403 1404 var anyItem bool 1405 var topState ctxItem 1406 for _, item := range dc.hc.ic.files { 1407 if item.endTxNum < txNum { 1408 continue 1409 } 1410 anyItem = true 1411 topState = item 1412 break 1413 } 1414 if anyItem { 1415 // If there were no changes but there were history files, the value can be obtained from value files 1416 var val []byte 1417 for i := len(dc.files) - 1; i >= 0; i-- { 1418 if dc.files[i].startTxNum > topState.startTxNum { 1419 continue 1420 } 1421 reader := dc.statelessBtree(i) 1422 if reader.Empty() { 1423 continue 1424 } 1425 cur, err := reader.Seek(key) 1426 if err != nil { 1427 dc.d.logger.Warn("failed to read history before from file", "key", key, "err", err) 1428 return nil, false, err 1429 } 1430 if cur == nil { 1431 continue 1432 } 1433 if bytes.Equal(cur.Key(), key) { 1434 val = cur.Value() 1435 break 1436 } 1437 } 1438 return val, true, nil 1439 } 1440 // Value not found in history files, look in the recent history 1441 if roTx == nil { 1442 return nil, false, fmt.Errorf("roTx is nil") 1443 } 1444 return dc.hc.getNoStateFromDB(key, txNum, roTx) 1445 } 1446 1447 // GetBeforeTxNum does not always require usage of roTx. If it is possible to determine 1448 // historical value based only on static files, roTx will not be used. 1449 func (dc *DomainContext) GetBeforeTxNum(key []byte, txNum uint64, roTx kv.Tx) ([]byte, error) { 1450 v, hOk, err := dc.historyBeforeTxNum(key, txNum, roTx) 1451 if err != nil { 1452 return nil, err 1453 } 1454 if hOk { 1455 // if history returned marker of key creation 1456 // domain must return nil 1457 if len(v) == 0 { 1458 return nil, nil 1459 } 1460 return v, nil 1461 } 1462 if v, _, err = dc.get(key, txNum-1, roTx); err != nil { 1463 return nil, err 1464 } 1465 return v, nil 1466 } 1467 1468 func (dc *DomainContext) Close() { 1469 for _, item := range dc.files { 1470 if item.src.frozen { 1471 continue 1472 } 1473 refCnt := item.src.refcount.Add(-1) 1474 //GC: last reader responsible to remove useles files: close it and delete 1475 if refCnt == 0 && item.src.canDelete.Load() { 1476 item.src.closeFilesAndRemove() 1477 } 1478 } 1479 dc.hc.Close() 1480 } 1481 1482 // IteratePrefix iterates over key-value pairs of the domain that start with given prefix 1483 // Such iteration is not intended to be used in public API, therefore it uses read-write transaction 1484 // inside the domain. Another version of this for public API use needs to be created, that uses 1485 // roTx instead and supports ending the iterations before it reaches the end. 1486 func (dc *DomainContext) IteratePrefix(prefix []byte, it func(k, v []byte)) error { 1487 dc.d.stats.HistoryQueries.Add(1) 1488 1489 var cp CursorHeap 1490 heap.Init(&cp) 1491 var k, v []byte 1492 var err error 1493 keysCursor, err := dc.d.tx.CursorDupSort(dc.d.keysTable) 1494 if err != nil { 1495 return err 1496 } 1497 defer keysCursor.Close() 1498 if k, v, err = keysCursor.Seek(prefix); err != nil { 1499 return err 1500 } 1501 if bytes.HasPrefix(k, prefix) { 1502 keySuffix := make([]byte, len(k)+8) 1503 copy(keySuffix, k) 1504 copy(keySuffix[len(k):], v) 1505 step := ^binary.BigEndian.Uint64(v) 1506 txNum := step * dc.d.aggregationStep 1507 if v, err = dc.d.tx.GetOne(dc.d.valsTable, keySuffix); err != nil { 1508 return err 1509 } 1510 heap.Push(&cp, &CursorItem{t: DB_CURSOR, key: common.Copy(k), val: common.Copy(v), c: keysCursor, endTxNum: txNum, reverse: true}) 1511 } 1512 1513 for i, item := range dc.files { 1514 bg := dc.statelessBtree(i) 1515 if bg.Empty() { 1516 continue 1517 } 1518 1519 cursor, err := bg.Seek(prefix) 1520 if err != nil { 1521 continue 1522 } 1523 1524 g := dc.statelessGetter(i) 1525 key := cursor.Key() 1526 if bytes.HasPrefix(key, prefix) { 1527 val := cursor.Value() 1528 heap.Push(&cp, &CursorItem{t: FILE_CURSOR, key: key, val: val, dg: g, endTxNum: item.endTxNum, reverse: true}) 1529 } 1530 } 1531 for cp.Len() > 0 { 1532 lastKey := common.Copy(cp[0].key) 1533 lastVal := common.Copy(cp[0].val) 1534 // Advance all the items that have this key (including the top) 1535 for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) { 1536 ci1 := cp[0] 1537 switch ci1.t { 1538 case FILE_CURSOR: 1539 if ci1.dg.HasNext() { 1540 ci1.key, _ = ci1.dg.Next(ci1.key[:0]) 1541 if bytes.HasPrefix(ci1.key, prefix) { 1542 ci1.val, _ = ci1.dg.Next(ci1.val[:0]) 1543 heap.Fix(&cp, 0) 1544 } else { 1545 heap.Pop(&cp) 1546 } 1547 } else { 1548 heap.Pop(&cp) 1549 } 1550 case DB_CURSOR: 1551 k, v, err = ci1.c.NextNoDup() 1552 if err != nil { 1553 return err 1554 } 1555 if k != nil && bytes.HasPrefix(k, prefix) { 1556 ci1.key = common.Copy(k) 1557 keySuffix := make([]byte, len(k)+8) 1558 copy(keySuffix, k) 1559 copy(keySuffix[len(k):], v) 1560 if v, err = dc.d.tx.GetOne(dc.d.valsTable, keySuffix); err != nil { 1561 return err 1562 } 1563 ci1.val = common.Copy(v) 1564 heap.Fix(&cp, 0) 1565 } else { 1566 heap.Pop(&cp) 1567 } 1568 } 1569 } 1570 if len(lastVal) > 0 { 1571 it(lastKey, lastVal) 1572 } 1573 } 1574 return nil 1575 }