github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "io" 25 "os" 26 "path/filepath" 27 "sync" 28 "sync/atomic" 29 30 "github.com/ethereum/go-ethereum/common" 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/metrics" 33 "github.com/golang/snappy" 34 ) 35 36 var ( 37 // errClosed is returned if an operation attempts to read from or write to the 38 // freezer table after it has already been closed. 39 errClosed = errors.New("closed") 40 41 // errOutOfBounds is returned if the item requested is not contained within the 42 // freezer table. 43 errOutOfBounds = errors.New("out of bounds") 44 45 // errNotSupported is returned if the database doesn't support the required operation. 46 errNotSupported = errors.New("this operation is not supported") 47 ) 48 49 // indexEntry contains the number/id of the file that the data resides in, as well as the 50 // offset within the file to the end of the data. 51 // In serialized form, the filenum is stored as uint16. 52 type indexEntry struct { 53 filenum uint32 // stored as uint16 ( 2 bytes ) 54 offset uint32 // stored as uint32 ( 4 bytes ) 55 } 56 57 const indexEntrySize = 6 58 59 // unmarshalBinary deserializes binary b into the rawIndex entry. 60 func (i *indexEntry) unmarshalBinary(b []byte) { 61 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 62 i.offset = binary.BigEndian.Uint32(b[2:6]) 63 } 64 65 // append adds the encoded entry to the end of b. 66 func (i *indexEntry) append(b []byte) []byte { 67 offset := len(b) 68 out := append(b, make([]byte, indexEntrySize)...) 69 binary.BigEndian.PutUint16(out[offset:], uint16(i.filenum)) 70 binary.BigEndian.PutUint32(out[offset+2:], i.offset) 71 return out 72 } 73 74 // bounds returns the start- and end- offsets, and the file number of where to 75 // read there data item marked by the two index entries. The two entries are 76 // assumed to be sequential. 77 func (i *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) { 78 if i.filenum != end.filenum { 79 // If a piece of data 'crosses' a data-file, 80 // it's actually in one piece on the second data-file. 81 // We return a zero-indexEntry for the second file as start 82 return 0, end.offset, end.filenum 83 } 84 return i.offset, end.offset, end.filenum 85 } 86 87 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 88 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 89 // file (uncompressed 64 bit indices into the data file). 90 type freezerTable struct { 91 items atomic.Uint64 // Number of items stored in the table (including items removed from tail) 92 itemOffset atomic.Uint64 // Number of items removed from the table 93 94 // itemHidden is the number of items marked as deleted. Tail deletion is 95 // only supported at file level which means the actual deletion will be 96 // delayed until the entire data file is marked as deleted. Before that 97 // these items will be hidden to prevent being visited again. The value 98 // should never be lower than itemOffset. 99 itemHidden atomic.Uint64 100 101 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 102 readonly bool 103 maxFileSize uint32 // Max file size for data-files 104 name string 105 path string 106 107 head *os.File // File descriptor for the data head of the table 108 index *os.File // File descriptor for the indexEntry file of the table 109 meta *os.File // File descriptor for metadata of the table 110 files map[uint32]*os.File // open files 111 headId uint32 // number of the currently active head file 112 tailId uint32 // number of the earliest file 113 114 headBytes int64 // Number of bytes written to the head file 115 readMeter metrics.Meter // Meter for measuring the effective amount of data read 116 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 117 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 118 119 logger log.Logger // Logger with database path and table name embedded 120 lock sync.RWMutex // Mutex protecting the data file descriptors 121 } 122 123 // newFreezerTable opens the given path as a freezer table. 124 func newFreezerTable(path, name string, disableSnappy, readonly bool) (*freezerTable, error) { 125 return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly) 126 } 127 128 // newTable opens a freezer table, creating the data and index files if they are 129 // non-existent. Both files are truncated to the shortest common length to ensure 130 // they don't go out of sync. 131 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) { 132 // Ensure the containing directory exists and open the indexEntry file 133 if err := os.MkdirAll(path, 0755); err != nil { 134 return nil, err 135 } 136 var idxName string 137 if noCompression { 138 idxName = fmt.Sprintf("%s.ridx", name) // raw index file 139 } else { 140 idxName = fmt.Sprintf("%s.cidx", name) // compressed index file 141 } 142 var ( 143 err error 144 index *os.File 145 meta *os.File 146 ) 147 if readonly { 148 // Will fail if table index file or meta file is not existent 149 index, err = openFreezerFileForReadOnly(filepath.Join(path, idxName)) 150 if err != nil { 151 return nil, err 152 } 153 meta, err = openFreezerFileForReadOnly(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 154 if err != nil { 155 return nil, err 156 } 157 } else { 158 index, err = openFreezerFileForAppend(filepath.Join(path, idxName)) 159 if err != nil { 160 return nil, err 161 } 162 meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 163 if err != nil { 164 return nil, err 165 } 166 } 167 // Create the table and repair any past inconsistency 168 tab := &freezerTable{ 169 index: index, 170 meta: meta, 171 files: make(map[uint32]*os.File), 172 readMeter: readMeter, 173 writeMeter: writeMeter, 174 sizeGauge: sizeGauge, 175 name: name, 176 path: path, 177 logger: log.New("database", path, "table", name), 178 noCompression: noCompression, 179 readonly: readonly, 180 maxFileSize: maxFilesize, 181 } 182 if err := tab.repair(); err != nil { 183 tab.Close() 184 return nil, err 185 } 186 // Initialize the starting size counter 187 size, err := tab.sizeNolock() 188 if err != nil { 189 tab.Close() 190 return nil, err 191 } 192 tab.sizeGauge.Inc(int64(size)) 193 194 return tab, nil 195 } 196 197 // repair cross-checks the head and the index file and truncates them to 198 // be in sync with each other after a potential crash / data loss. 199 func (t *freezerTable) repair() error { 200 // Create a temporary offset buffer to init files with and read indexEntry into 201 buffer := make([]byte, indexEntrySize) 202 203 // If we've just created the files, initialize the index with the 0 indexEntry 204 stat, err := t.index.Stat() 205 if err != nil { 206 return err 207 } 208 if stat.Size() == 0 { 209 if _, err := t.index.Write(buffer); err != nil { 210 return err 211 } 212 } 213 // Ensure the index is a multiple of indexEntrySize bytes 214 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 215 if t.readonly { 216 return fmt.Errorf("index file(path: %s, name: %s) size is not a multiple of %d", t.path, t.name, indexEntrySize) 217 } 218 if err := truncateFreezerFile(t.index, stat.Size()-overflow); err != nil { 219 return err 220 } // New file can't trigger this path 221 } 222 // Retrieve the file sizes and prepare for truncation 223 if stat, err = t.index.Stat(); err != nil { 224 return err 225 } 226 offsetsSize := stat.Size() 227 228 // Open the head file 229 var ( 230 firstIndex indexEntry 231 lastIndex indexEntry 232 contentSize int64 233 contentExp int64 234 verbose bool 235 ) 236 // Read index zero, determine what file is the earliest 237 // and what item offset to use 238 t.index.ReadAt(buffer, 0) 239 firstIndex.unmarshalBinary(buffer) 240 241 // Assign the tail fields with the first stored index. 242 // The total removed items is represented with an uint32, 243 // which is not enough in theory but enough in practice. 244 // TODO: use uint64 to represent total removed items. 245 t.tailId = firstIndex.filenum 246 t.itemOffset.Store(uint64(firstIndex.offset)) 247 248 // Load metadata from the file 249 meta, err := loadMetadata(t.meta, t.itemOffset.Load()) 250 if err != nil { 251 return err 252 } 253 t.itemHidden.Store(meta.VirtualTail) 254 255 // Read the last index, use the default value in case the freezer is empty 256 if offsetsSize == indexEntrySize { 257 lastIndex = indexEntry{filenum: t.tailId, offset: 0} 258 } else { 259 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 260 lastIndex.unmarshalBinary(buffer) 261 } 262 // Print an error log if the index is corrupted due to an incorrect 263 // last index item. While it is theoretically possible to have a zero offset 264 // by storing all zero-size items, it is highly unlikely to occur in practice. 265 if lastIndex.offset == 0 && offsetsSize/indexEntrySize > 1 { 266 log.Error("Corrupted index file detected", "lastOffset", lastIndex.offset, "indexes", offsetsSize/indexEntrySize) 267 } 268 if t.readonly { 269 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForReadOnly) 270 } else { 271 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 272 } 273 if err != nil { 274 return err 275 } 276 if stat, err = t.head.Stat(); err != nil { 277 return err 278 } 279 contentSize = stat.Size() 280 281 // Keep truncating both files until they come in sync 282 contentExp = int64(lastIndex.offset) 283 for contentExp != contentSize { 284 if t.readonly { 285 return fmt.Errorf("freezer table(path: %s, name: %s, num: %d) is corrupted", t.path, t.name, lastIndex.filenum) 286 } 287 verbose = true 288 // Truncate the head file to the last offset pointer 289 if contentExp < contentSize { 290 t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize) 291 if err := truncateFreezerFile(t.head, contentExp); err != nil { 292 return err 293 } 294 contentSize = contentExp 295 } 296 // Truncate the index to point within the head file 297 if contentExp > contentSize { 298 t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize) 299 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 300 return err 301 } 302 offsetsSize -= indexEntrySize 303 304 // Read the new head index, use the default value in case 305 // the freezer is already empty. 306 var newLastIndex indexEntry 307 if offsetsSize == indexEntrySize { 308 newLastIndex = indexEntry{filenum: t.tailId, offset: 0} 309 } else { 310 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 311 newLastIndex.unmarshalBinary(buffer) 312 } 313 // We might have slipped back into an earlier head-file here 314 if newLastIndex.filenum != lastIndex.filenum { 315 // Release earlier opened file 316 t.releaseFile(lastIndex.filenum) 317 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 318 return err 319 } 320 if stat, err = t.head.Stat(); err != nil { 321 // TODO, anything more we can do here? 322 // A data file has gone missing... 323 return err 324 } 325 contentSize = stat.Size() 326 } 327 lastIndex = newLastIndex 328 contentExp = int64(lastIndex.offset) 329 } 330 } 331 // Sync() fails for read-only files on windows. 332 if !t.readonly { 333 // Ensure all reparation changes have been written to disk 334 if err := t.index.Sync(); err != nil { 335 return err 336 } 337 if err := t.head.Sync(); err != nil { 338 return err 339 } 340 if err := t.meta.Sync(); err != nil { 341 return err 342 } 343 } 344 // Update the item and byte counters and return 345 t.items.Store(t.itemOffset.Load() + uint64(offsetsSize/indexEntrySize-1)) // last indexEntry points to the end of the data file 346 t.headBytes = contentSize 347 t.headId = lastIndex.filenum 348 349 // Delete the leftover files because of head deletion 350 t.releaseFilesAfter(t.headId, true) 351 352 // Delete the leftover files because of tail deletion 353 t.releaseFilesBefore(t.tailId, true) 354 355 // Close opened files and preopen all files 356 if err := t.preopen(); err != nil { 357 return err 358 } 359 if verbose { 360 t.logger.Info("Chain freezer table opened", "items", t.items.Load(), "deleted", t.itemOffset.Load(), "hidden", t.itemHidden.Load(), "tailId", t.tailId, "headId", t.headId, "size", t.headBytes) 361 } else { 362 t.logger.Debug("Chain freezer table opened", "items", t.items.Load(), "size", common.StorageSize(t.headBytes)) 363 } 364 return nil 365 } 366 367 // preopen opens all files that the freezer will need. This method should be called from an init-context, 368 // since it assumes that it doesn't have to bother with locking 369 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 370 // obtain a write-lock within Retrieve. 371 func (t *freezerTable) preopen() (err error) { 372 // The repair might have already opened (some) files 373 t.releaseFilesAfter(0, false) 374 375 // Open all except head in RDONLY 376 for i := t.tailId; i < t.headId; i++ { 377 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 378 return err 379 } 380 } 381 if t.readonly { 382 t.head, err = t.openFile(t.headId, openFreezerFileForReadOnly) 383 } else { 384 // Open head in read/write 385 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 386 } 387 return err 388 } 389 390 // truncateHead discards any recent data above the provided threshold number. 391 func (t *freezerTable) truncateHead(items uint64) error { 392 t.lock.Lock() 393 defer t.lock.Unlock() 394 395 // Ensure the given truncate target falls in the correct range 396 existing := t.items.Load() 397 if existing <= items { 398 return nil 399 } 400 if items < t.itemHidden.Load() { 401 return errors.New("truncation below tail") 402 } 403 // We need to truncate, save the old size for metrics tracking 404 oldSize, err := t.sizeNolock() 405 if err != nil { 406 return err 407 } 408 // Something's out of sync, truncate the table's offset index 409 log := t.logger.Debug 410 if existing > items+1 { 411 log = t.logger.Warn // Only loud warn if we delete multiple items 412 } 413 log("Truncating freezer table", "items", existing, "limit", items) 414 415 // Truncate the index file first, the tail position is also considered 416 // when calculating the new freezer table length. 417 length := items - t.itemOffset.Load() 418 if err := truncateFreezerFile(t.index, int64(length+1)*indexEntrySize); err != nil { 419 return err 420 } 421 if err := t.index.Sync(); err != nil { 422 return err 423 } 424 // Calculate the new expected size of the data file and truncate it 425 var expected indexEntry 426 if length == 0 { 427 expected = indexEntry{filenum: t.tailId, offset: 0} 428 } else { 429 buffer := make([]byte, indexEntrySize) 430 if _, err := t.index.ReadAt(buffer, int64(length*indexEntrySize)); err != nil { 431 return err 432 } 433 expected.unmarshalBinary(buffer) 434 } 435 // We might need to truncate back to older files 436 if expected.filenum != t.headId { 437 // If already open for reading, force-reopen for writing 438 t.releaseFile(expected.filenum) 439 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 440 if err != nil { 441 return err 442 } 443 // Release any files _after the current head -- both the previous head 444 // and any files which may have been opened for reading 445 t.releaseFilesAfter(expected.filenum, true) 446 447 // Set back the historic head 448 t.head = newHead 449 t.headId = expected.filenum 450 } 451 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 452 return err 453 } 454 if err := t.head.Sync(); err != nil { 455 return err 456 } 457 // All data files truncated, set internal counters and return 458 t.headBytes = int64(expected.offset) 459 t.items.Store(items) 460 461 // Retrieve the new size and update the total size counter 462 newSize, err := t.sizeNolock() 463 if err != nil { 464 return err 465 } 466 t.sizeGauge.Dec(int64(oldSize - newSize)) 467 return nil 468 } 469 470 // sizeHidden returns the total data size of hidden items in the freezer table. 471 // This function assumes the lock is already held. 472 func (t *freezerTable) sizeHidden() (uint64, error) { 473 hidden, offset := t.itemHidden.Load(), t.itemOffset.Load() 474 if hidden <= offset { 475 return 0, nil 476 } 477 indices, err := t.getIndices(hidden-1, 1) 478 if err != nil { 479 return 0, err 480 } 481 return uint64(indices[1].offset), nil 482 } 483 484 // truncateTail discards any recent data before the provided threshold number. 485 func (t *freezerTable) truncateTail(items uint64) error { 486 t.lock.Lock() 487 defer t.lock.Unlock() 488 489 // Ensure the given truncate target falls in the correct range 490 if t.itemHidden.Load() >= items { 491 return nil 492 } 493 if t.items.Load() < items { 494 return errors.New("truncation above head") 495 } 496 // Load the new tail index by the given new tail position 497 var ( 498 newTailId uint32 499 buffer = make([]byte, indexEntrySize) 500 ) 501 if t.items.Load() == items { 502 newTailId = t.headId 503 } else { 504 offset := items - t.itemOffset.Load() 505 if _, err := t.index.ReadAt(buffer, int64((offset+1)*indexEntrySize)); err != nil { 506 return err 507 } 508 var newTail indexEntry 509 newTail.unmarshalBinary(buffer) 510 newTailId = newTail.filenum 511 } 512 // Save the old size for metrics tracking. This needs to be done 513 // before any updates to either itemHidden or itemOffset. 514 oldSize, err := t.sizeNolock() 515 if err != nil { 516 return err 517 } 518 // Update the virtual tail marker and hidden these entries in table. 519 t.itemHidden.Store(items) 520 if err := writeMetadata(t.meta, newMetadata(items)); err != nil { 521 return err 522 } 523 // Hidden items still fall in the current tail file, no data file 524 // can be dropped. 525 if t.tailId == newTailId { 526 return nil 527 } 528 // Hidden items fall in the incorrect range, returns the error. 529 if t.tailId > newTailId { 530 return fmt.Errorf("invalid index, tail-file %d, item-file %d", t.tailId, newTailId) 531 } 532 // Count how many items can be deleted from the file. 533 var ( 534 newDeleted = items 535 deleted = t.itemOffset.Load() 536 ) 537 // Hidden items exceed the current tail file, drop the relevant data files. 538 for current := items - 1; current >= deleted; current -= 1 { 539 if _, err := t.index.ReadAt(buffer, int64((current-deleted+1)*indexEntrySize)); err != nil { 540 return err 541 } 542 var pre indexEntry 543 pre.unmarshalBinary(buffer) 544 if pre.filenum != newTailId { 545 break 546 } 547 newDeleted = current 548 } 549 // Commit the changes of metadata file first before manipulating 550 // the indexes file. 551 if err := t.meta.Sync(); err != nil { 552 return err 553 } 554 // Close the index file before shorten it. 555 if err := t.index.Close(); err != nil { 556 return err 557 } 558 // Truncate the deleted index entries from the index file. 559 err = copyFrom(t.index.Name(), t.index.Name(), indexEntrySize*(newDeleted-deleted+1), func(f *os.File) error { 560 tailIndex := indexEntry{ 561 filenum: newTailId, 562 offset: uint32(newDeleted), 563 } 564 _, err := f.Write(tailIndex.append(nil)) 565 return err 566 }) 567 if err != nil { 568 return err 569 } 570 // Reopen the modified index file to load the changes 571 t.index, err = openFreezerFileForAppend(t.index.Name()) 572 if err != nil { 573 return err 574 } 575 // Sync the file to ensure changes are flushed to disk 576 if err := t.index.Sync(); err != nil { 577 return err 578 } 579 // Release any files before the current tail 580 t.tailId = newTailId 581 t.itemOffset.Store(newDeleted) 582 t.releaseFilesBefore(t.tailId, true) 583 584 // Retrieve the new size and update the total size counter 585 newSize, err := t.sizeNolock() 586 if err != nil { 587 return err 588 } 589 t.sizeGauge.Dec(int64(oldSize - newSize)) 590 return nil 591 } 592 593 // Close closes all opened files. 594 func (t *freezerTable) Close() error { 595 t.lock.Lock() 596 defer t.lock.Unlock() 597 598 var errs []error 599 doClose := func(f *os.File, sync bool, close bool) { 600 if sync && !t.readonly { 601 if err := f.Sync(); err != nil { 602 errs = append(errs, err) 603 } 604 } 605 if close { 606 if err := f.Close(); err != nil { 607 errs = append(errs, err) 608 } 609 } 610 } 611 // Trying to fsync a file opened in rdonly causes "Access denied" 612 // error on Windows. 613 doClose(t.index, true, true) 614 doClose(t.meta, true, true) 615 616 // The preopened non-head data-files are all opened in readonly. 617 // The head is opened in rw-mode, so we sync it here - but since it's also 618 // part of t.files, it will be closed in the loop below. 619 doClose(t.head, true, false) // sync but do not close 620 621 for _, f := range t.files { 622 doClose(f, false, true) // close but do not sync 623 } 624 t.index = nil 625 t.meta = nil 626 t.head = nil 627 628 if errs != nil { 629 return fmt.Errorf("%v", errs) 630 } 631 return nil 632 } 633 634 // openFile assumes that the write-lock is held by the caller 635 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 636 var exist bool 637 if f, exist = t.files[num]; !exist { 638 var name string 639 if t.noCompression { 640 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 641 } else { 642 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 643 } 644 f, err = opener(filepath.Join(t.path, name)) 645 if err != nil { 646 return nil, err 647 } 648 t.files[num] = f 649 } 650 return f, err 651 } 652 653 // releaseFile closes a file, and removes it from the open file cache. 654 // Assumes that the caller holds the write lock 655 func (t *freezerTable) releaseFile(num uint32) { 656 if f, exist := t.files[num]; exist { 657 delete(t.files, num) 658 f.Close() 659 } 660 } 661 662 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 663 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 664 for fnum, f := range t.files { 665 if fnum > num { 666 delete(t.files, fnum) 667 f.Close() 668 if remove { 669 os.Remove(f.Name()) 670 } 671 } 672 } 673 } 674 675 // releaseFilesBefore closes all open files with a lower number, and optionally also deletes the files 676 func (t *freezerTable) releaseFilesBefore(num uint32, remove bool) { 677 for fnum, f := range t.files { 678 if fnum < num { 679 delete(t.files, fnum) 680 f.Close() 681 if remove { 682 os.Remove(f.Name()) 683 } 684 } 685 } 686 } 687 688 // getIndices returns the index entries for the given from-item, covering 'count' items. 689 // N.B: The actual number of returned indices for N items will always be N+1 (unless an 690 // error is returned). 691 // OBS: This method assumes that the caller has already verified (and/or trimmed) the range 692 // so that the items are within bounds. If this method is used to read out of bounds, 693 // it will return error. 694 func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) { 695 // Apply the table-offset 696 from = from - t.itemOffset.Load() 697 698 // For reading N items, we need N+1 indices. 699 buffer := make([]byte, (count+1)*indexEntrySize) 700 if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil { 701 return nil, err 702 } 703 var ( 704 indices []*indexEntry 705 offset int 706 ) 707 for i := from; i <= from+count; i++ { 708 index := new(indexEntry) 709 index.unmarshalBinary(buffer[offset:]) 710 offset += indexEntrySize 711 indices = append(indices, index) 712 } 713 if from == 0 { 714 // Special case if we're reading the first item in the freezer. We assume that 715 // the first item always start from zero(regarding the deletion, we 716 // only support deletion by files, so that the assumption is held). 717 // This means we can use the first item metadata to carry information about 718 // the 'global' offset, for the deletion-case 719 indices[0].offset = 0 720 indices[0].filenum = indices[1].filenum 721 } 722 return indices, nil 723 } 724 725 // Retrieve looks up the data offset of an item with the given number and retrieves 726 // the raw binary blob from the data file. 727 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 728 items, err := t.RetrieveItems(item, 1, 0) 729 if err != nil { 730 return nil, err 731 } 732 return items[0], nil 733 } 734 735 // RetrieveItems returns multiple items in sequence, starting from the index 'start'. 736 // It will return at most 'max' items, but will abort earlier to respect the 737 // 'maxBytes' argument. However, if the 'maxBytes' is smaller than the size of one 738 // item, it _will_ return one element and possibly overflow the maxBytes. 739 func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, error) { 740 // First we read the 'raw' data, which might be compressed. 741 diskData, sizes, err := t.retrieveItems(start, count, maxBytes) 742 if err != nil { 743 return nil, err 744 } 745 var ( 746 output = make([][]byte, 0, count) 747 offset int // offset for reading 748 outputSize int // size of uncompressed data 749 ) 750 // Now slice up the data and decompress. 751 for i, diskSize := range sizes { 752 item := diskData[offset : offset+diskSize] 753 offset += diskSize 754 decompressedSize := diskSize 755 if !t.noCompression { 756 decompressedSize, _ = snappy.DecodedLen(item) 757 } 758 if i > 0 && maxBytes != 0 && uint64(outputSize+decompressedSize) > maxBytes { 759 break 760 } 761 if !t.noCompression { 762 data, err := snappy.Decode(nil, item) 763 if err != nil { 764 return nil, err 765 } 766 output = append(output, data) 767 } else { 768 output = append(output, item) 769 } 770 outputSize += decompressedSize 771 } 772 return output, nil 773 } 774 775 // retrieveItems reads up to 'count' items from the table. It reads at least 776 // one item, but otherwise avoids reading more than maxBytes bytes. Freezer 777 // will ignore the size limitation and continuously allocate memory to store 778 // data if maxBytes is 0. It returns the (potentially compressed) data, and 779 // the sizes. 780 func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) { 781 t.lock.RLock() 782 defer t.lock.RUnlock() 783 784 // Ensure the table and the item are accessible 785 if t.index == nil || t.head == nil || t.meta == nil { 786 return nil, nil, errClosed 787 } 788 var ( 789 items = t.items.Load() // the total items(head + 1) 790 hidden = t.itemHidden.Load() // the number of hidden items 791 ) 792 // Ensure the start is written, not deleted from the tail, and that the 793 // caller actually wants something 794 if items <= start || hidden > start || count == 0 { 795 return nil, nil, errOutOfBounds 796 } 797 if start+count > items { 798 count = items - start 799 } 800 var output []byte // Buffer to read data into 801 if maxBytes != 0 { 802 output = make([]byte, 0, maxBytes) 803 } else { 804 output = make([]byte, 0, 1024) // initial buffer cap 805 } 806 // readData is a helper method to read a single data item from disk. 807 readData := func(fileId, start uint32, length int) error { 808 output = grow(output, length) 809 dataFile, exist := t.files[fileId] 810 if !exist { 811 return fmt.Errorf("missing data file %d", fileId) 812 } 813 if _, err := dataFile.ReadAt(output[len(output)-length:], int64(start)); err != nil { 814 return fmt.Errorf("%w, fileid: %d, start: %d, length: %d", err, fileId, start, length) 815 } 816 return nil 817 } 818 // Read all the indexes in one go 819 indices, err := t.getIndices(start, count) 820 if err != nil { 821 return nil, nil, err 822 } 823 var ( 824 sizes []int // The sizes for each element 825 totalSize = 0 // The total size of all data read so far 826 readStart = indices[0].offset // Where, in the file, to start reading 827 unreadSize = 0 // The size of the as-yet-unread data 828 ) 829 830 for i, firstIndex := range indices[:len(indices)-1] { 831 secondIndex := indices[i+1] 832 // Determine the size of the item. 833 offset1, offset2, _ := firstIndex.bounds(secondIndex) 834 size := int(offset2 - offset1) 835 // Crossing a file boundary? 836 if secondIndex.filenum != firstIndex.filenum { 837 // If we have unread data in the first file, we need to do that read now. 838 if unreadSize > 0 { 839 if err := readData(firstIndex.filenum, readStart, unreadSize); err != nil { 840 return nil, nil, err 841 } 842 unreadSize = 0 843 } 844 readStart = 0 845 } 846 if i > 0 && uint64(totalSize+size) > maxBytes && maxBytes != 0 { 847 // About to break out due to byte limit being exceeded. We don't 848 // read this last item, but we need to do the deferred reads now. 849 if unreadSize > 0 { 850 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 851 return nil, nil, err 852 } 853 } 854 break 855 } 856 // Defer the read for later 857 unreadSize += size 858 totalSize += size 859 sizes = append(sizes, size) 860 if i == len(indices)-2 || (uint64(totalSize) > maxBytes && maxBytes != 0) { 861 // Last item, need to do the read now 862 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 863 return nil, nil, err 864 } 865 break 866 } 867 } 868 869 // Update metrics. 870 t.readMeter.Mark(int64(totalSize)) 871 return output, sizes, nil 872 } 873 874 // has returns an indicator whether the specified number data is still accessible 875 // in the freezer table. 876 func (t *freezerTable) has(number uint64) bool { 877 return t.items.Load() > number && t.itemHidden.Load() <= number 878 } 879 880 // size returns the total data size in the freezer table. 881 func (t *freezerTable) size() (uint64, error) { 882 t.lock.RLock() 883 defer t.lock.RUnlock() 884 885 return t.sizeNolock() 886 } 887 888 // sizeNolock returns the total data size in the freezer table. This function 889 // assumes the lock is already held. 890 func (t *freezerTable) sizeNolock() (uint64, error) { 891 stat, err := t.index.Stat() 892 if err != nil { 893 return 0, err 894 } 895 hidden, err := t.sizeHidden() 896 if err != nil { 897 return 0, err 898 } 899 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) - hidden 900 return total, nil 901 } 902 903 // advanceHead should be called when the current head file would outgrow the file limits, 904 // and a new file must be opened. The caller of this method must hold the write-lock 905 // before calling this method. 906 func (t *freezerTable) advanceHead() error { 907 t.lock.Lock() 908 defer t.lock.Unlock() 909 910 // We open the next file in truncated mode -- if this file already 911 // exists, we need to start over from scratch on it. 912 nextID := t.headId + 1 913 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 914 if err != nil { 915 return err 916 } 917 // Commit the contents of the old file to stable storage and 918 // tear it down. It will be re-opened in read-only mode. 919 if err := t.head.Sync(); err != nil { 920 return err 921 } 922 t.releaseFile(t.headId) 923 t.openFile(t.headId, openFreezerFileForReadOnly) 924 925 // Swap out the current head. 926 t.head = newHead 927 t.headBytes = 0 928 t.headId = nextID 929 return nil 930 } 931 932 // Sync pushes any pending data from memory out to disk. This is an expensive 933 // operation, so use it with care. 934 func (t *freezerTable) Sync() error { 935 t.lock.Lock() 936 defer t.lock.Unlock() 937 if t.index == nil || t.head == nil || t.meta == nil { 938 return errClosed 939 } 940 var err error 941 trackError := func(e error) { 942 if e != nil && err == nil { 943 err = e 944 } 945 } 946 947 trackError(t.index.Sync()) 948 trackError(t.meta.Sync()) 949 trackError(t.head.Sync()) 950 return err 951 } 952 953 func (t *freezerTable) dumpIndexStdout(start, stop int64) { 954 t.dumpIndex(os.Stdout, start, stop) 955 } 956 957 func (t *freezerTable) dumpIndexString(start, stop int64) string { 958 var out bytes.Buffer 959 out.WriteString("\n") 960 t.dumpIndex(&out, start, stop) 961 return out.String() 962 } 963 964 func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { 965 meta, err := readMetadata(t.meta) 966 if err != nil { 967 fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) 968 return 969 } 970 fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version, 971 t.items.Load(), t.itemOffset.Load(), t.itemHidden.Load()) 972 973 buf := make([]byte, indexEntrySize) 974 975 fmt.Fprintf(w, "| number | fileno | offset |\n") 976 fmt.Fprintf(w, "|--------|--------|--------|\n") 977 978 for i := uint64(start); ; i++ { 979 if _, err := t.index.ReadAt(buf, int64((i+1)*indexEntrySize)); err != nil { 980 break 981 } 982 var entry indexEntry 983 entry.unmarshalBinary(buf) 984 fmt.Fprintf(w, "| %03d | %03d | %03d | \n", i, entry.filenum, entry.offset) 985 if stop > 0 && i >= uint64(stop) { 986 break 987 } 988 } 989 fmt.Fprintf(w, "|--------------------------|\n") 990 }