github.com/tacshi/go-ethereum@v0.0.0-20230616113857-84a434e20921/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "io" 25 "os" 26 "path/filepath" 27 "sync" 28 "sync/atomic" 29 30 "github.com/golang/snappy" 31 "github.com/tacshi/go-ethereum/common" 32 "github.com/tacshi/go-ethereum/log" 33 "github.com/tacshi/go-ethereum/metrics" 34 ) 35 36 var ( 37 // errClosed is returned if an operation attempts to read from or write to the 38 // freezer table after it has already been closed. 39 errClosed = errors.New("closed") 40 41 // errOutOfBounds is returned if the item requested is not contained within the 42 // freezer table. 43 errOutOfBounds = errors.New("out of bounds") 44 45 // errNotSupported is returned if the database doesn't support the required operation. 46 errNotSupported = errors.New("this operation is not supported") 47 ) 48 49 // indexEntry contains the number/id of the file that the data resides in, as well as the 50 // offset within the file to the end of the data. 51 // In serialized form, the filenum is stored as uint16. 52 type indexEntry struct { 53 filenum uint32 // stored as uint16 ( 2 bytes ) 54 offset uint32 // stored as uint32 ( 4 bytes ) 55 } 56 57 const indexEntrySize = 6 58 59 // unmarshalBinary deserializes binary b into the rawIndex entry. 60 func (i *indexEntry) unmarshalBinary(b []byte) { 61 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 62 i.offset = binary.BigEndian.Uint32(b[2:6]) 63 } 64 65 // append adds the encoded entry to the end of b. 66 func (i *indexEntry) append(b []byte) []byte { 67 offset := len(b) 68 out := append(b, make([]byte, indexEntrySize)...) 69 binary.BigEndian.PutUint16(out[offset:], uint16(i.filenum)) 70 binary.BigEndian.PutUint32(out[offset+2:], i.offset) 71 return out 72 } 73 74 // bounds returns the start- and end- offsets, and the file number of where to 75 // read there data item marked by the two index entries. The two entries are 76 // assumed to be sequential. 77 func (i *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) { 78 if i.filenum != end.filenum { 79 // If a piece of data 'crosses' a data-file, 80 // it's actually in one piece on the second data-file. 81 // We return a zero-indexEntry for the second file as start 82 return 0, end.offset, end.filenum 83 } 84 return i.offset, end.offset, end.filenum 85 } 86 87 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 88 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 89 // file (uncompressed 64 bit indices into the data file). 90 type freezerTable struct { 91 // WARNING: The `items` field is accessed atomically. On 32 bit platforms, only 92 // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, 93 // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). 94 items uint64 // Number of items stored in the table (including items removed from tail) 95 itemOffset uint64 // Number of items removed from the table 96 97 // itemHidden is the number of items marked as deleted. Tail deletion is 98 // only supported at file level which means the actual deletion will be 99 // delayed until the entire data file is marked as deleted. Before that 100 // these items will be hidden to prevent being visited again. The value 101 // should never be lower than itemOffset. 102 itemHidden uint64 103 104 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 105 readonly bool 106 maxFileSize uint32 // Max file size for data-files 107 name string 108 path string 109 110 head *os.File // File descriptor for the data head of the table 111 index *os.File // File descriptor for the indexEntry file of the table 112 meta *os.File // File descriptor for metadata of the table 113 files map[uint32]*os.File // open files 114 headId uint32 // number of the currently active head file 115 tailId uint32 // number of the earliest file 116 117 headBytes int64 // Number of bytes written to the head file 118 readMeter metrics.Meter // Meter for measuring the effective amount of data read 119 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 120 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 121 122 logger log.Logger // Logger with database path and table name embedded 123 lock sync.RWMutex // Mutex protecting the data file descriptors 124 } 125 126 // newFreezerTable opens the given path as a freezer table. 127 func newFreezerTable(path, name string, disableSnappy, readonly bool) (*freezerTable, error) { 128 return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly) 129 } 130 131 // newTable opens a freezer table, creating the data and index files if they are 132 // non-existent. Both files are truncated to the shortest common length to ensure 133 // they don't go out of sync. 134 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) { 135 // Ensure the containing directory exists and open the indexEntry file 136 if err := os.MkdirAll(path, 0755); err != nil { 137 return nil, err 138 } 139 var idxName string 140 if noCompression { 141 idxName = fmt.Sprintf("%s.ridx", name) // raw index file 142 } else { 143 idxName = fmt.Sprintf("%s.cidx", name) // compressed index file 144 } 145 var ( 146 err error 147 index *os.File 148 meta *os.File 149 ) 150 if readonly { 151 // Will fail if table index file or meta file is not existent 152 index, err = openFreezerFileForReadOnly(filepath.Join(path, idxName)) 153 if err != nil { 154 return nil, err 155 } 156 meta, err = openFreezerFileForReadOnly(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 157 if err != nil { 158 return nil, err 159 } 160 } else { 161 index, err = openFreezerFileForAppend(filepath.Join(path, idxName)) 162 if err != nil { 163 return nil, err 164 } 165 meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 166 if err != nil { 167 return nil, err 168 } 169 } 170 // Create the table and repair any past inconsistency 171 tab := &freezerTable{ 172 index: index, 173 meta: meta, 174 files: make(map[uint32]*os.File), 175 readMeter: readMeter, 176 writeMeter: writeMeter, 177 sizeGauge: sizeGauge, 178 name: name, 179 path: path, 180 logger: log.New("database", path, "table", name), 181 noCompression: noCompression, 182 readonly: readonly, 183 maxFileSize: maxFilesize, 184 } 185 if err := tab.repair(); err != nil { 186 tab.Close() 187 return nil, err 188 } 189 // Initialize the starting size counter 190 size, err := tab.sizeNolock() 191 if err != nil { 192 tab.Close() 193 return nil, err 194 } 195 tab.sizeGauge.Inc(int64(size)) 196 197 return tab, nil 198 } 199 200 // repair cross-checks the head and the index file and truncates them to 201 // be in sync with each other after a potential crash / data loss. 202 func (t *freezerTable) repair() error { 203 // Create a temporary offset buffer to init files with and read indexEntry into 204 buffer := make([]byte, indexEntrySize) 205 206 // If we've just created the files, initialize the index with the 0 indexEntry 207 stat, err := t.index.Stat() 208 if err != nil { 209 return err 210 } 211 if stat.Size() == 0 { 212 if _, err := t.index.Write(buffer); err != nil { 213 return err 214 } 215 } 216 // Ensure the index is a multiple of indexEntrySize bytes 217 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 218 truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path 219 } 220 // Retrieve the file sizes and prepare for truncation 221 if stat, err = t.index.Stat(); err != nil { 222 return err 223 } 224 offsetsSize := stat.Size() 225 226 // Open the head file 227 var ( 228 firstIndex indexEntry 229 lastIndex indexEntry 230 contentSize int64 231 contentExp int64 232 verbose bool 233 ) 234 // Read index zero, determine what file is the earliest 235 // and what item offset to use 236 t.index.ReadAt(buffer, 0) 237 firstIndex.unmarshalBinary(buffer) 238 239 // Assign the tail fields with the first stored index. 240 // The total removed items is represented with an uint32, 241 // which is not enough in theory but enough in practice. 242 // TODO: use uint64 to represent total removed items. 243 t.tailId = firstIndex.filenum 244 t.itemOffset = uint64(firstIndex.offset) 245 246 // Load metadata from the file 247 meta, err := loadMetadata(t.meta, t.itemOffset) 248 if err != nil { 249 return err 250 } 251 t.itemHidden = meta.VirtualTail 252 253 // Read the last index, use the default value in case the freezer is empty 254 if offsetsSize == indexEntrySize { 255 lastIndex = indexEntry{filenum: t.tailId, offset: 0} 256 } else { 257 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 258 lastIndex.unmarshalBinary(buffer) 259 } 260 if t.readonly { 261 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForReadOnly) 262 } else { 263 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 264 } 265 if err != nil { 266 return err 267 } 268 if stat, err = t.head.Stat(); err != nil { 269 return err 270 } 271 contentSize = stat.Size() 272 273 // Keep truncating both files until they come in sync 274 contentExp = int64(lastIndex.offset) 275 for contentExp != contentSize { 276 verbose = true 277 // Truncate the head file to the last offset pointer 278 if contentExp < contentSize { 279 t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize) 280 if err := truncateFreezerFile(t.head, contentExp); err != nil { 281 return err 282 } 283 contentSize = contentExp 284 } 285 // Truncate the index to point within the head file 286 if contentExp > contentSize { 287 t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize) 288 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 289 return err 290 } 291 offsetsSize -= indexEntrySize 292 293 // Read the new head index, use the default value in case 294 // the freezer is already empty. 295 var newLastIndex indexEntry 296 if offsetsSize == indexEntrySize { 297 newLastIndex = indexEntry{filenum: t.tailId, offset: 0} 298 } else { 299 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 300 newLastIndex.unmarshalBinary(buffer) 301 } 302 // We might have slipped back into an earlier head-file here 303 if newLastIndex.filenum != lastIndex.filenum { 304 // Release earlier opened file 305 t.releaseFile(lastIndex.filenum) 306 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 307 return err 308 } 309 if stat, err = t.head.Stat(); err != nil { 310 // TODO, anything more we can do here? 311 // A data file has gone missing... 312 return err 313 } 314 contentSize = stat.Size() 315 } 316 lastIndex = newLastIndex 317 contentExp = int64(lastIndex.offset) 318 } 319 } 320 // Sync() fails for read-only files on windows. 321 if !t.readonly { 322 // Ensure all reparation changes have been written to disk 323 if err := t.index.Sync(); err != nil { 324 return err 325 } 326 if err := t.head.Sync(); err != nil { 327 return err 328 } 329 if err := t.meta.Sync(); err != nil { 330 return err 331 } 332 } 333 // Update the item and byte counters and return 334 t.items = t.itemOffset + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file 335 t.headBytes = contentSize 336 t.headId = lastIndex.filenum 337 338 // Delete the leftover files because of head deletion 339 t.releaseFilesAfter(t.headId, true) 340 341 // Delete the leftover files because of tail deletion 342 t.releaseFilesBefore(t.tailId, true) 343 344 // Close opened files and preopen all files 345 if err := t.preopen(); err != nil { 346 return err 347 } 348 if verbose { 349 t.logger.Info("Chain freezer table opened", "items", t.items, "size", t.headBytes) 350 } else { 351 t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) 352 } 353 return nil 354 } 355 356 // preopen opens all files that the freezer will need. This method should be called from an init-context, 357 // since it assumes that it doesn't have to bother with locking 358 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 359 // obtain a write-lock within Retrieve. 360 func (t *freezerTable) preopen() (err error) { 361 // The repair might have already opened (some) files 362 t.releaseFilesAfter(0, false) 363 364 // Open all except head in RDONLY 365 for i := t.tailId; i < t.headId; i++ { 366 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 367 return err 368 } 369 } 370 if t.readonly { 371 t.head, err = t.openFile(t.headId, openFreezerFileForReadOnly) 372 } else { 373 // Open head in read/write 374 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 375 } 376 return err 377 } 378 379 // truncateHead discards any recent data above the provided threshold number. 380 func (t *freezerTable) truncateHead(items uint64) error { 381 t.lock.Lock() 382 defer t.lock.Unlock() 383 384 // Ensure the given truncate target falls in the correct range 385 existing := atomic.LoadUint64(&t.items) 386 if existing <= items { 387 return nil 388 } 389 if items < atomic.LoadUint64(&t.itemHidden) { 390 return errors.New("truncation below tail") 391 } 392 // We need to truncate, save the old size for metrics tracking 393 oldSize, err := t.sizeNolock() 394 if err != nil { 395 return err 396 } 397 // Something's out of sync, truncate the table's offset index 398 log := t.logger.Debug 399 if existing > items+1 { 400 log = t.logger.Warn // Only loud warn if we delete multiple items 401 } 402 log("Truncating freezer table", "items", existing, "limit", items) 403 404 // Truncate the index file first, the tail position is also considered 405 // when calculating the new freezer table length. 406 length := items - atomic.LoadUint64(&t.itemOffset) 407 if err := truncateFreezerFile(t.index, int64(length+1)*indexEntrySize); err != nil { 408 return err 409 } 410 // Calculate the new expected size of the data file and truncate it 411 var expected indexEntry 412 if length == 0 { 413 expected = indexEntry{filenum: t.tailId, offset: 0} 414 } else { 415 buffer := make([]byte, indexEntrySize) 416 if _, err := t.index.ReadAt(buffer, int64(length*indexEntrySize)); err != nil { 417 return err 418 } 419 expected.unmarshalBinary(buffer) 420 } 421 // We might need to truncate back to older files 422 if expected.filenum != t.headId { 423 // If already open for reading, force-reopen for writing 424 t.releaseFile(expected.filenum) 425 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 426 if err != nil { 427 return err 428 } 429 // Release any files _after the current head -- both the previous head 430 // and any files which may have been opened for reading 431 t.releaseFilesAfter(expected.filenum, true) 432 // Set back the historic head 433 t.head = newHead 434 t.headId = expected.filenum 435 } 436 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 437 return err 438 } 439 // All data files truncated, set internal counters and return 440 t.headBytes = int64(expected.offset) 441 atomic.StoreUint64(&t.items, items) 442 443 // Retrieve the new size and update the total size counter 444 newSize, err := t.sizeNolock() 445 if err != nil { 446 return err 447 } 448 t.sizeGauge.Dec(int64(oldSize - newSize)) 449 return nil 450 } 451 452 // truncateTail discards any recent data before the provided threshold number. 453 func (t *freezerTable) truncateTail(items uint64) error { 454 t.lock.Lock() 455 defer t.lock.Unlock() 456 457 // Ensure the given truncate target falls in the correct range 458 if atomic.LoadUint64(&t.itemHidden) >= items { 459 return nil 460 } 461 if atomic.LoadUint64(&t.items) < items { 462 return errors.New("truncation above head") 463 } 464 // Load the new tail index by the given new tail position 465 var ( 466 newTailId uint32 467 buffer = make([]byte, indexEntrySize) 468 ) 469 if atomic.LoadUint64(&t.items) == items { 470 newTailId = t.headId 471 } else { 472 offset := items - atomic.LoadUint64(&t.itemOffset) 473 if _, err := t.index.ReadAt(buffer, int64((offset+1)*indexEntrySize)); err != nil { 474 return err 475 } 476 var newTail indexEntry 477 newTail.unmarshalBinary(buffer) 478 newTailId = newTail.filenum 479 } 480 // Update the virtual tail marker and hidden these entries in table. 481 atomic.StoreUint64(&t.itemHidden, items) 482 if err := writeMetadata(t.meta, newMetadata(items)); err != nil { 483 return err 484 } 485 // Hidden items still fall in the current tail file, no data file 486 // can be dropped. 487 if t.tailId == newTailId { 488 return nil 489 } 490 // Hidden items fall in the incorrect range, returns the error. 491 if t.tailId > newTailId { 492 return fmt.Errorf("invalid index, tail-file %d, item-file %d", t.tailId, newTailId) 493 } 494 // Hidden items exceed the current tail file, drop the relevant 495 // data files. We need to truncate, save the old size for metrics 496 // tracking. 497 oldSize, err := t.sizeNolock() 498 if err != nil { 499 return err 500 } 501 // Count how many items can be deleted from the file. 502 var ( 503 newDeleted = items 504 deleted = atomic.LoadUint64(&t.itemOffset) 505 ) 506 for current := items - 1; current >= deleted; current -= 1 { 507 if _, err := t.index.ReadAt(buffer, int64((current-deleted+1)*indexEntrySize)); err != nil { 508 return err 509 } 510 var pre indexEntry 511 pre.unmarshalBinary(buffer) 512 if pre.filenum != newTailId { 513 break 514 } 515 newDeleted = current 516 } 517 // Commit the changes of metadata file first before manipulating 518 // the indexes file. 519 if err := t.meta.Sync(); err != nil { 520 return err 521 } 522 // Truncate the deleted index entries from the index file. 523 err = copyFrom(t.index.Name(), t.index.Name(), indexEntrySize*(newDeleted-deleted+1), func(f *os.File) error { 524 tailIndex := indexEntry{ 525 filenum: newTailId, 526 offset: uint32(newDeleted), 527 } 528 _, err := f.Write(tailIndex.append(nil)) 529 return err 530 }) 531 if err != nil { 532 return err 533 } 534 // Reopen the modified index file to load the changes 535 if err := t.index.Close(); err != nil { 536 return err 537 } 538 t.index, err = openFreezerFileForAppend(t.index.Name()) 539 if err != nil { 540 return err 541 } 542 // Release any files before the current tail 543 t.tailId = newTailId 544 atomic.StoreUint64(&t.itemOffset, newDeleted) 545 t.releaseFilesBefore(t.tailId, true) 546 547 // Retrieve the new size and update the total size counter 548 newSize, err := t.sizeNolock() 549 if err != nil { 550 return err 551 } 552 t.sizeGauge.Dec(int64(oldSize - newSize)) 553 return nil 554 } 555 556 // Close closes all opened files. 557 func (t *freezerTable) Close() error { 558 t.lock.Lock() 559 defer t.lock.Unlock() 560 561 var errs []error 562 doClose := func(f *os.File, sync bool, close bool) { 563 if sync && !t.readonly { 564 if err := f.Sync(); err != nil { 565 errs = append(errs, err) 566 } 567 } 568 if close { 569 if err := f.Close(); err != nil { 570 errs = append(errs, err) 571 } 572 } 573 } 574 // Trying to fsync a file opened in rdonly causes "Access denied" 575 // error on Windows. 576 doClose(t.index, true, true) 577 doClose(t.meta, true, true) 578 // The preopened non-head data-files are all opened in readonly. 579 // The head is opened in rw-mode, so we sync it here - but since it's also 580 // part of t.files, it will be closed in the loop below. 581 doClose(t.head, true, false) // sync but do not close 582 for _, f := range t.files { 583 doClose(f, false, true) // close but do not sync 584 } 585 t.index = nil 586 t.meta = nil 587 t.head = nil 588 589 if errs != nil { 590 return fmt.Errorf("%v", errs) 591 } 592 return nil 593 } 594 595 // openFile assumes that the write-lock is held by the caller 596 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 597 var exist bool 598 if f, exist = t.files[num]; !exist { 599 var name string 600 if t.noCompression { 601 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 602 } else { 603 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 604 } 605 f, err = opener(filepath.Join(t.path, name)) 606 if err != nil { 607 return nil, err 608 } 609 t.files[num] = f 610 } 611 return f, err 612 } 613 614 // releaseFile closes a file, and removes it from the open file cache. 615 // Assumes that the caller holds the write lock 616 func (t *freezerTable) releaseFile(num uint32) { 617 if f, exist := t.files[num]; exist { 618 delete(t.files, num) 619 f.Close() 620 } 621 } 622 623 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 624 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 625 for fnum, f := range t.files { 626 if fnum > num { 627 delete(t.files, fnum) 628 f.Close() 629 if remove { 630 os.Remove(f.Name()) 631 } 632 } 633 } 634 } 635 636 // releaseFilesBefore closes all open files with a lower number, and optionally also deletes the files 637 func (t *freezerTable) releaseFilesBefore(num uint32, remove bool) { 638 for fnum, f := range t.files { 639 if fnum < num { 640 delete(t.files, fnum) 641 f.Close() 642 if remove { 643 os.Remove(f.Name()) 644 } 645 } 646 } 647 } 648 649 // getIndices returns the index entries for the given from-item, covering 'count' items. 650 // N.B: The actual number of returned indices for N items will always be N+1 (unless an 651 // error is returned). 652 // OBS: This method assumes that the caller has already verified (and/or trimmed) the range 653 // so that the items are within bounds. If this method is used to read out of bounds, 654 // it will return error. 655 func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) { 656 // Apply the table-offset 657 from = from - t.itemOffset 658 // For reading N items, we need N+1 indices. 659 buffer := make([]byte, (count+1)*indexEntrySize) 660 if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil { 661 return nil, err 662 } 663 var ( 664 indices []*indexEntry 665 offset int 666 ) 667 for i := from; i <= from+count; i++ { 668 index := new(indexEntry) 669 index.unmarshalBinary(buffer[offset:]) 670 offset += indexEntrySize 671 indices = append(indices, index) 672 } 673 if from == 0 { 674 // Special case if we're reading the first item in the freezer. We assume that 675 // the first item always start from zero(regarding the deletion, we 676 // only support deletion by files, so that the assumption is held). 677 // This means we can use the first item metadata to carry information about 678 // the 'global' offset, for the deletion-case 679 indices[0].offset = 0 680 indices[0].filenum = indices[1].filenum 681 } 682 return indices, nil 683 } 684 685 // Retrieve looks up the data offset of an item with the given number and retrieves 686 // the raw binary blob from the data file. 687 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 688 items, err := t.RetrieveItems(item, 1, 0) 689 if err != nil { 690 return nil, err 691 } 692 return items[0], nil 693 } 694 695 // RetrieveItems returns multiple items in sequence, starting from the index 'start'. 696 // It will return at most 'max' items, but will abort earlier to respect the 697 // 'maxBytes' argument. However, if the 'maxBytes' is smaller than the size of one 698 // item, it _will_ return one element and possibly overflow the maxBytes. 699 func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, error) { 700 // First we read the 'raw' data, which might be compressed. 701 diskData, sizes, err := t.retrieveItems(start, count, maxBytes) 702 if err != nil { 703 return nil, err 704 } 705 var ( 706 output = make([][]byte, 0, count) 707 offset int // offset for reading 708 outputSize int // size of uncompressed data 709 ) 710 // Now slice up the data and decompress. 711 for i, diskSize := range sizes { 712 item := diskData[offset : offset+diskSize] 713 offset += diskSize 714 decompressedSize := diskSize 715 if !t.noCompression { 716 decompressedSize, _ = snappy.DecodedLen(item) 717 } 718 if i > 0 && uint64(outputSize+decompressedSize) > maxBytes { 719 break 720 } 721 if !t.noCompression { 722 data, err := snappy.Decode(nil, item) 723 if err != nil { 724 return nil, err 725 } 726 output = append(output, data) 727 } else { 728 output = append(output, item) 729 } 730 outputSize += decompressedSize 731 } 732 return output, nil 733 } 734 735 // retrieveItems reads up to 'count' items from the table. It reads at least 736 // one item, but otherwise avoids reading more than maxBytes bytes. 737 // It returns the (potentially compressed) data, and the sizes. 738 func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) { 739 t.lock.RLock() 740 defer t.lock.RUnlock() 741 742 // Ensure the table and the item are accessible 743 if t.index == nil || t.head == nil || t.meta == nil { 744 return nil, nil, errClosed 745 } 746 var ( 747 items = atomic.LoadUint64(&t.items) // the total items(head + 1) 748 hidden = atomic.LoadUint64(&t.itemHidden) // the number of hidden items 749 ) 750 // Ensure the start is written, not deleted from the tail, and that the 751 // caller actually wants something 752 if items <= start || hidden > start || count == 0 { 753 return nil, nil, errOutOfBounds 754 } 755 if start+count > items { 756 count = items - start 757 } 758 var ( 759 output = make([]byte, maxBytes) // Buffer to read data into 760 outputSize int // Used size of that buffer 761 ) 762 // readData is a helper method to read a single data item from disk. 763 readData := func(fileId, start uint32, length int) error { 764 // In case a small limit is used, and the elements are large, may need to 765 // realloc the read-buffer when reading the first (and only) item. 766 if len(output) < length { 767 output = make([]byte, length) 768 } 769 dataFile, exist := t.files[fileId] 770 if !exist { 771 return fmt.Errorf("missing data file %d", fileId) 772 } 773 if _, err := dataFile.ReadAt(output[outputSize:outputSize+length], int64(start)); err != nil { 774 return err 775 } 776 outputSize += length 777 return nil 778 } 779 // Read all the indexes in one go 780 indices, err := t.getIndices(start, count) 781 if err != nil { 782 return nil, nil, err 783 } 784 var ( 785 sizes []int // The sizes for each element 786 totalSize = 0 // The total size of all data read so far 787 readStart = indices[0].offset // Where, in the file, to start reading 788 unreadSize = 0 // The size of the as-yet-unread data 789 ) 790 791 for i, firstIndex := range indices[:len(indices)-1] { 792 secondIndex := indices[i+1] 793 // Determine the size of the item. 794 offset1, offset2, _ := firstIndex.bounds(secondIndex) 795 size := int(offset2 - offset1) 796 // Crossing a file boundary? 797 if secondIndex.filenum != firstIndex.filenum { 798 // If we have unread data in the first file, we need to do that read now. 799 if unreadSize > 0 { 800 if err := readData(firstIndex.filenum, readStart, unreadSize); err != nil { 801 return nil, nil, err 802 } 803 unreadSize = 0 804 } 805 readStart = 0 806 } 807 if i > 0 && uint64(totalSize+size) > maxBytes { 808 // About to break out due to byte limit being exceeded. We don't 809 // read this last item, but we need to do the deferred reads now. 810 if unreadSize > 0 { 811 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 812 return nil, nil, err 813 } 814 } 815 break 816 } 817 // Defer the read for later 818 unreadSize += size 819 totalSize += size 820 sizes = append(sizes, size) 821 if i == len(indices)-2 || uint64(totalSize) > maxBytes { 822 // Last item, need to do the read now 823 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 824 return nil, nil, err 825 } 826 break 827 } 828 } 829 return output[:outputSize], sizes, nil 830 } 831 832 // has returns an indicator whether the specified number data is still accessible 833 // in the freezer table. 834 func (t *freezerTable) has(number uint64) bool { 835 return atomic.LoadUint64(&t.items) > number && atomic.LoadUint64(&t.itemHidden) <= number 836 } 837 838 // size returns the total data size in the freezer table. 839 func (t *freezerTable) size() (uint64, error) { 840 t.lock.RLock() 841 defer t.lock.RUnlock() 842 843 return t.sizeNolock() 844 } 845 846 // sizeNolock returns the total data size in the freezer table without obtaining 847 // the mutex first. 848 func (t *freezerTable) sizeNolock() (uint64, error) { 849 stat, err := t.index.Stat() 850 if err != nil { 851 return 0, err 852 } 853 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) 854 return total, nil 855 } 856 857 // advanceHead should be called when the current head file would outgrow the file limits, 858 // and a new file must be opened. The caller of this method must hold the write-lock 859 // before calling this method. 860 func (t *freezerTable) advanceHead() error { 861 t.lock.Lock() 862 defer t.lock.Unlock() 863 864 // We open the next file in truncated mode -- if this file already 865 // exists, we need to start over from scratch on it. 866 nextID := t.headId + 1 867 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 868 if err != nil { 869 return err 870 } 871 // Commit the contents of the old file to stable storage and 872 // tear it down. It will be re-opened in read-only mode. 873 if err := t.head.Sync(); err != nil { 874 return err 875 } 876 t.releaseFile(t.headId) 877 t.openFile(t.headId, openFreezerFileForReadOnly) 878 879 // Swap out the current head. 880 t.head = newHead 881 t.headBytes = 0 882 t.headId = nextID 883 return nil 884 } 885 886 // Sync pushes any pending data from memory out to disk. This is an expensive 887 // operation, so use it with care. 888 func (t *freezerTable) Sync() error { 889 t.lock.Lock() 890 defer t.lock.Unlock() 891 if t.index == nil || t.head == nil || t.meta == nil { 892 return errClosed 893 } 894 var err error 895 trackError := func(e error) { 896 if e != nil && err == nil { 897 err = e 898 } 899 } 900 901 trackError(t.index.Sync()) 902 trackError(t.meta.Sync()) 903 trackError(t.head.Sync()) 904 return err 905 } 906 907 func (t *freezerTable) dumpIndexStdout(start, stop int64) { 908 t.dumpIndex(os.Stdout, start, stop) 909 } 910 911 func (t *freezerTable) dumpIndexString(start, stop int64) string { 912 var out bytes.Buffer 913 out.WriteString("\n") 914 t.dumpIndex(&out, start, stop) 915 return out.String() 916 } 917 918 func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { 919 meta, err := readMetadata(t.meta) 920 if err != nil { 921 fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) 922 return 923 } 924 fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version, 925 atomic.LoadUint64(&t.items), atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden)) 926 927 buf := make([]byte, indexEntrySize) 928 929 fmt.Fprintf(w, "| number | fileno | offset |\n") 930 fmt.Fprintf(w, "|--------|--------|--------|\n") 931 932 for i := uint64(start); ; i++ { 933 if _, err := t.index.ReadAt(buf, int64((i+1)*indexEntrySize)); err != nil { 934 break 935 } 936 var entry indexEntry 937 entry.unmarshalBinary(buf) 938 fmt.Fprintf(w, "| %03d | %03d | %03d | \n", i, entry.filenum, entry.offset) 939 if stop > 0 && i >= uint64(stop) { 940 break 941 } 942 } 943 fmt.Fprintf(w, "|--------------------------|\n") 944 }