github.com/ethw3/go-ethereuma@v0.0.0-20221013053120-c14602a4c23c/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "io" 25 "os" 26 "path/filepath" 27 "sync" 28 "sync/atomic" 29 30 "github.com/ethw3/go-ethereuma/common" 31 "github.com/ethw3/go-ethereuma/log" 32 "github.com/ethw3/go-ethereuma/metrics" 33 "github.com/golang/snappy" 34 ) 35 36 var ( 37 // errClosed is returned if an operation attempts to read from or write to the 38 // freezer table after it has already been closed. 39 errClosed = errors.New("closed") 40 41 // errOutOfBounds is returned if the item requested is not contained within the 42 // freezer table. 43 errOutOfBounds = errors.New("out of bounds") 44 45 // errNotSupported is returned if the database doesn't support the required operation. 46 errNotSupported = errors.New("this operation is not supported") 47 ) 48 49 // indexEntry contains the number/id of the file that the data resides in, as well as the 50 // offset within the file to the end of the data. 51 // In serialized form, the filenum is stored as uint16. 52 type indexEntry struct { 53 filenum uint32 // stored as uint16 ( 2 bytes ) 54 offset uint32 // stored as uint32 ( 4 bytes ) 55 } 56 57 const indexEntrySize = 6 58 59 // unmarshalBinary deserializes binary b into the rawIndex entry. 60 func (i *indexEntry) unmarshalBinary(b []byte) { 61 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 62 i.offset = binary.BigEndian.Uint32(b[2:6]) 63 } 64 65 // append adds the encoded entry to the end of b. 66 func (i *indexEntry) append(b []byte) []byte { 67 offset := len(b) 68 out := append(b, make([]byte, indexEntrySize)...) 69 binary.BigEndian.PutUint16(out[offset:], uint16(i.filenum)) 70 binary.BigEndian.PutUint32(out[offset+2:], i.offset) 71 return out 72 } 73 74 // bounds returns the start- and end- offsets, and the file number of where to 75 // read there data item marked by the two index entries. The two entries are 76 // assumed to be sequential. 77 func (i *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) { 78 if i.filenum != end.filenum { 79 // If a piece of data 'crosses' a data-file, 80 // it's actually in one piece on the second data-file. 81 // We return a zero-indexEntry for the second file as start 82 return 0, end.offset, end.filenum 83 } 84 return i.offset, end.offset, end.filenum 85 } 86 87 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 88 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 89 // file (uncompressed 64 bit indices into the data file). 90 type freezerTable struct { 91 // WARNING: The `items` field is accessed atomically. On 32 bit platforms, only 92 // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, 93 // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). 94 items uint64 // Number of items stored in the table (including items removed from tail) 95 itemOffset uint64 // Number of items removed from the table 96 97 // itemHidden is the number of items marked as deleted. Tail deletion is 98 // only supported at file level which means the actual deletion will be 99 // delayed until the entire data file is marked as deleted. Before that 100 // these items will be hidden to prevent being visited again. The value 101 // should never be lower than itemOffset. 102 itemHidden uint64 103 104 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 105 readonly bool 106 maxFileSize uint32 // Max file size for data-files 107 name string 108 path string 109 110 head *os.File // File descriptor for the data head of the table 111 index *os.File // File descriptor for the indexEntry file of the table 112 meta *os.File // File descriptor for metadata of the table 113 files map[uint32]*os.File // open files 114 headId uint32 // number of the currently active head file 115 tailId uint32 // number of the earliest file 116 117 headBytes int64 // Number of bytes written to the head file 118 readMeter metrics.Meter // Meter for measuring the effective amount of data read 119 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 120 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 121 122 logger log.Logger // Logger with database path and table name embedded 123 lock sync.RWMutex // Mutex protecting the data file descriptors 124 } 125 126 // newFreezerTable opens the given path as a freezer table. 127 func newFreezerTable(path, name string, disableSnappy, readonly bool) (*freezerTable, error) { 128 return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly) 129 } 130 131 // newTable opens a freezer table, creating the data and index files if they are 132 // non-existent. Both files are truncated to the shortest common length to ensure 133 // they don't go out of sync. 134 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) { 135 // Ensure the containing directory exists and open the indexEntry file 136 if err := os.MkdirAll(path, 0755); err != nil { 137 return nil, err 138 } 139 var idxName string 140 if noCompression { 141 idxName = fmt.Sprintf("%s.ridx", name) // raw index file 142 } else { 143 idxName = fmt.Sprintf("%s.cidx", name) // compressed index file 144 } 145 var ( 146 err error 147 index *os.File 148 meta *os.File 149 ) 150 if readonly { 151 // Will fail if table doesn't exist 152 index, err = openFreezerFileForReadOnly(filepath.Join(path, idxName)) 153 if err != nil { 154 return nil, err 155 } 156 // TODO(rjl493456442) change it to read-only mode. Open the metadata file 157 // in rw mode. It's a temporary solution for now and should be changed 158 // whenever the tail deletion is actually used. The reason for this hack is 159 // the additional meta file for each freezer table is added in order to support 160 // tail deletion, but for most legacy nodes this file is missing. This check 161 // will suddenly break lots of database relevant commands. So the metadata file 162 // is always opened for mutation and nothing else will be written except 163 // the initialization. 164 meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 165 if err != nil { 166 return nil, err 167 } 168 } else { 169 index, err = openFreezerFileForAppend(filepath.Join(path, idxName)) 170 if err != nil { 171 return nil, err 172 } 173 meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 174 if err != nil { 175 return nil, err 176 } 177 } 178 // Create the table and repair any past inconsistency 179 tab := &freezerTable{ 180 index: index, 181 meta: meta, 182 files: make(map[uint32]*os.File), 183 readMeter: readMeter, 184 writeMeter: writeMeter, 185 sizeGauge: sizeGauge, 186 name: name, 187 path: path, 188 logger: log.New("database", path, "table", name), 189 noCompression: noCompression, 190 readonly: readonly, 191 maxFileSize: maxFilesize, 192 } 193 if err := tab.repair(); err != nil { 194 tab.Close() 195 return nil, err 196 } 197 // Initialize the starting size counter 198 size, err := tab.sizeNolock() 199 if err != nil { 200 tab.Close() 201 return nil, err 202 } 203 tab.sizeGauge.Inc(int64(size)) 204 205 return tab, nil 206 } 207 208 // repair cross-checks the head and the index file and truncates them to 209 // be in sync with each other after a potential crash / data loss. 210 func (t *freezerTable) repair() error { 211 // Create a temporary offset buffer to init files with and read indexEntry into 212 buffer := make([]byte, indexEntrySize) 213 214 // If we've just created the files, initialize the index with the 0 indexEntry 215 stat, err := t.index.Stat() 216 if err != nil { 217 return err 218 } 219 if stat.Size() == 0 { 220 if _, err := t.index.Write(buffer); err != nil { 221 return err 222 } 223 } 224 // Ensure the index is a multiple of indexEntrySize bytes 225 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 226 truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path 227 } 228 // Retrieve the file sizes and prepare for truncation 229 if stat, err = t.index.Stat(); err != nil { 230 return err 231 } 232 offsetsSize := stat.Size() 233 234 // Open the head file 235 var ( 236 firstIndex indexEntry 237 lastIndex indexEntry 238 contentSize int64 239 contentExp int64 240 ) 241 // Read index zero, determine what file is the earliest 242 // and what item offset to use 243 t.index.ReadAt(buffer, 0) 244 firstIndex.unmarshalBinary(buffer) 245 246 // Assign the tail fields with the first stored index. 247 // The total removed items is represented with an uint32, 248 // which is not enough in theory but enough in practice. 249 // TODO: use uint64 to represent total removed items. 250 t.tailId = firstIndex.filenum 251 t.itemOffset = uint64(firstIndex.offset) 252 253 // Load metadata from the file 254 meta, err := loadMetadata(t.meta, t.itemOffset) 255 if err != nil { 256 return err 257 } 258 t.itemHidden = meta.VirtualTail 259 260 // Read the last index, use the default value in case the freezer is empty 261 if offsetsSize == indexEntrySize { 262 lastIndex = indexEntry{filenum: t.tailId, offset: 0} 263 } else { 264 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 265 lastIndex.unmarshalBinary(buffer) 266 } 267 if t.readonly { 268 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForReadOnly) 269 } else { 270 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 271 } 272 if err != nil { 273 return err 274 } 275 if stat, err = t.head.Stat(); err != nil { 276 return err 277 } 278 contentSize = stat.Size() 279 280 // Keep truncating both files until they come in sync 281 contentExp = int64(lastIndex.offset) 282 for contentExp != contentSize { 283 // Truncate the head file to the last offset pointer 284 if contentExp < contentSize { 285 t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 286 if err := truncateFreezerFile(t.head, contentExp); err != nil { 287 return err 288 } 289 contentSize = contentExp 290 } 291 // Truncate the index to point within the head file 292 if contentExp > contentSize { 293 t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 294 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 295 return err 296 } 297 offsetsSize -= indexEntrySize 298 299 // Read the new head index, use the default value in case 300 // the freezer is already empty. 301 var newLastIndex indexEntry 302 if offsetsSize == indexEntrySize { 303 newLastIndex = indexEntry{filenum: t.tailId, offset: 0} 304 } else { 305 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 306 newLastIndex.unmarshalBinary(buffer) 307 } 308 // We might have slipped back into an earlier head-file here 309 if newLastIndex.filenum != lastIndex.filenum { 310 // Release earlier opened file 311 t.releaseFile(lastIndex.filenum) 312 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 313 return err 314 } 315 if stat, err = t.head.Stat(); err != nil { 316 // TODO, anything more we can do here? 317 // A data file has gone missing... 318 return err 319 } 320 contentSize = stat.Size() 321 } 322 lastIndex = newLastIndex 323 contentExp = int64(lastIndex.offset) 324 } 325 } 326 // Sync() fails for read-only files on windows. 327 if !t.readonly { 328 // Ensure all reparation changes have been written to disk 329 if err := t.index.Sync(); err != nil { 330 return err 331 } 332 if err := t.head.Sync(); err != nil { 333 return err 334 } 335 if err := t.meta.Sync(); err != nil { 336 return err 337 } 338 } 339 // Update the item and byte counters and return 340 t.items = t.itemOffset + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file 341 t.headBytes = contentSize 342 t.headId = lastIndex.filenum 343 344 // Delete the leftover files because of head deletion 345 t.releaseFilesAfter(t.headId, true) 346 347 // Delete the leftover files because of tail deletion 348 t.releaseFilesBefore(t.tailId, true) 349 350 // Close opened files and preopen all files 351 if err := t.preopen(); err != nil { 352 return err 353 } 354 t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) 355 return nil 356 } 357 358 // preopen opens all files that the freezer will need. This method should be called from an init-context, 359 // since it assumes that it doesn't have to bother with locking 360 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 361 // obtain a write-lock within Retrieve. 362 func (t *freezerTable) preopen() (err error) { 363 // The repair might have already opened (some) files 364 t.releaseFilesAfter(0, false) 365 366 // Open all except head in RDONLY 367 for i := t.tailId; i < t.headId; i++ { 368 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 369 return err 370 } 371 } 372 if t.readonly { 373 t.head, err = t.openFile(t.headId, openFreezerFileForReadOnly) 374 } else { 375 // Open head in read/write 376 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 377 } 378 return err 379 } 380 381 // truncateHead discards any recent data above the provided threshold number. 382 func (t *freezerTable) truncateHead(items uint64) error { 383 t.lock.Lock() 384 defer t.lock.Unlock() 385 386 // Ensure the given truncate target falls in the correct range 387 existing := atomic.LoadUint64(&t.items) 388 if existing <= items { 389 return nil 390 } 391 if items < atomic.LoadUint64(&t.itemHidden) { 392 return errors.New("truncation below tail") 393 } 394 // We need to truncate, save the old size for metrics tracking 395 oldSize, err := t.sizeNolock() 396 if err != nil { 397 return err 398 } 399 // Something's out of sync, truncate the table's offset index 400 log := t.logger.Debug 401 if existing > items+1 { 402 log = t.logger.Warn // Only loud warn if we delete multiple items 403 } 404 log("Truncating freezer table", "items", existing, "limit", items) 405 406 // Truncate the index file first, the tail position is also considered 407 // when calculating the new freezer table length. 408 length := items - atomic.LoadUint64(&t.itemOffset) 409 if err := truncateFreezerFile(t.index, int64(length+1)*indexEntrySize); err != nil { 410 return err 411 } 412 // Calculate the new expected size of the data file and truncate it 413 var expected indexEntry 414 if length == 0 { 415 expected = indexEntry{filenum: t.tailId, offset: 0} 416 } else { 417 buffer := make([]byte, indexEntrySize) 418 if _, err := t.index.ReadAt(buffer, int64(length*indexEntrySize)); err != nil { 419 return err 420 } 421 expected.unmarshalBinary(buffer) 422 } 423 // We might need to truncate back to older files 424 if expected.filenum != t.headId { 425 // If already open for reading, force-reopen for writing 426 t.releaseFile(expected.filenum) 427 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 428 if err != nil { 429 return err 430 } 431 // Release any files _after the current head -- both the previous head 432 // and any files which may have been opened for reading 433 t.releaseFilesAfter(expected.filenum, true) 434 // Set back the historic head 435 t.head = newHead 436 t.headId = expected.filenum 437 } 438 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 439 return err 440 } 441 // All data files truncated, set internal counters and return 442 t.headBytes = int64(expected.offset) 443 atomic.StoreUint64(&t.items, items) 444 445 // Retrieve the new size and update the total size counter 446 newSize, err := t.sizeNolock() 447 if err != nil { 448 return err 449 } 450 t.sizeGauge.Dec(int64(oldSize - newSize)) 451 return nil 452 } 453 454 // truncateTail discards any recent data before the provided threshold number. 455 func (t *freezerTable) truncateTail(items uint64) error { 456 t.lock.Lock() 457 defer t.lock.Unlock() 458 459 // Ensure the given truncate target falls in the correct range 460 if atomic.LoadUint64(&t.itemHidden) >= items { 461 return nil 462 } 463 if atomic.LoadUint64(&t.items) < items { 464 return errors.New("truncation above head") 465 } 466 // Load the new tail index by the given new tail position 467 var ( 468 newTailId uint32 469 buffer = make([]byte, indexEntrySize) 470 ) 471 if atomic.LoadUint64(&t.items) == items { 472 newTailId = t.headId 473 } else { 474 offset := items - atomic.LoadUint64(&t.itemOffset) 475 if _, err := t.index.ReadAt(buffer, int64((offset+1)*indexEntrySize)); err != nil { 476 return err 477 } 478 var newTail indexEntry 479 newTail.unmarshalBinary(buffer) 480 newTailId = newTail.filenum 481 } 482 // Update the virtual tail marker and hidden these entries in table. 483 atomic.StoreUint64(&t.itemHidden, items) 484 if err := writeMetadata(t.meta, newMetadata(items)); err != nil { 485 return err 486 } 487 // Hidden items still fall in the current tail file, no data file 488 // can be dropped. 489 if t.tailId == newTailId { 490 return nil 491 } 492 // Hidden items fall in the incorrect range, returns the error. 493 if t.tailId > newTailId { 494 return fmt.Errorf("invalid index, tail-file %d, item-file %d", t.tailId, newTailId) 495 } 496 // Hidden items exceed the current tail file, drop the relevant 497 // data files. We need to truncate, save the old size for metrics 498 // tracking. 499 oldSize, err := t.sizeNolock() 500 if err != nil { 501 return err 502 } 503 // Count how many items can be deleted from the file. 504 var ( 505 newDeleted = items 506 deleted = atomic.LoadUint64(&t.itemOffset) 507 ) 508 for current := items - 1; current >= deleted; current -= 1 { 509 if _, err := t.index.ReadAt(buffer, int64((current-deleted+1)*indexEntrySize)); err != nil { 510 return err 511 } 512 var pre indexEntry 513 pre.unmarshalBinary(buffer) 514 if pre.filenum != newTailId { 515 break 516 } 517 newDeleted = current 518 } 519 // Commit the changes of metadata file first before manipulating 520 // the indexes file. 521 if err := t.meta.Sync(); err != nil { 522 return err 523 } 524 // Truncate the deleted index entries from the index file. 525 err = copyFrom(t.index.Name(), t.index.Name(), indexEntrySize*(newDeleted-deleted+1), func(f *os.File) error { 526 tailIndex := indexEntry{ 527 filenum: newTailId, 528 offset: uint32(newDeleted), 529 } 530 _, err := f.Write(tailIndex.append(nil)) 531 return err 532 }) 533 if err != nil { 534 return err 535 } 536 // Reopen the modified index file to load the changes 537 if err := t.index.Close(); err != nil { 538 return err 539 } 540 t.index, err = openFreezerFileForAppend(t.index.Name()) 541 if err != nil { 542 return err 543 } 544 // Release any files before the current tail 545 t.tailId = newTailId 546 atomic.StoreUint64(&t.itemOffset, newDeleted) 547 t.releaseFilesBefore(t.tailId, true) 548 549 // Retrieve the new size and update the total size counter 550 newSize, err := t.sizeNolock() 551 if err != nil { 552 return err 553 } 554 t.sizeGauge.Dec(int64(oldSize - newSize)) 555 return nil 556 } 557 558 // Close closes all opened files. 559 func (t *freezerTable) Close() error { 560 t.lock.Lock() 561 defer t.lock.Unlock() 562 563 var errs []error 564 if err := t.index.Close(); err != nil { 565 errs = append(errs, err) 566 } 567 t.index = nil 568 569 if err := t.meta.Close(); err != nil { 570 errs = append(errs, err) 571 } 572 t.meta = nil 573 574 for _, f := range t.files { 575 if err := f.Close(); err != nil { 576 errs = append(errs, err) 577 } 578 } 579 t.head = nil 580 581 if errs != nil { 582 return fmt.Errorf("%v", errs) 583 } 584 return nil 585 } 586 587 // openFile assumes that the write-lock is held by the caller 588 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 589 var exist bool 590 if f, exist = t.files[num]; !exist { 591 var name string 592 if t.noCompression { 593 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 594 } else { 595 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 596 } 597 f, err = opener(filepath.Join(t.path, name)) 598 if err != nil { 599 return nil, err 600 } 601 t.files[num] = f 602 } 603 return f, err 604 } 605 606 // releaseFile closes a file, and removes it from the open file cache. 607 // Assumes that the caller holds the write lock 608 func (t *freezerTable) releaseFile(num uint32) { 609 if f, exist := t.files[num]; exist { 610 delete(t.files, num) 611 f.Close() 612 } 613 } 614 615 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 616 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 617 for fnum, f := range t.files { 618 if fnum > num { 619 delete(t.files, fnum) 620 f.Close() 621 if remove { 622 os.Remove(f.Name()) 623 } 624 } 625 } 626 } 627 628 // releaseFilesBefore closes all open files with a lower number, and optionally also deletes the files 629 func (t *freezerTable) releaseFilesBefore(num uint32, remove bool) { 630 for fnum, f := range t.files { 631 if fnum < num { 632 delete(t.files, fnum) 633 f.Close() 634 if remove { 635 os.Remove(f.Name()) 636 } 637 } 638 } 639 } 640 641 // getIndices returns the index entries for the given from-item, covering 'count' items. 642 // N.B: The actual number of returned indices for N items will always be N+1 (unless an 643 // error is returned). 644 // OBS: This method assumes that the caller has already verified (and/or trimmed) the range 645 // so that the items are within bounds. If this method is used to read out of bounds, 646 // it will return error. 647 func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) { 648 // Apply the table-offset 649 from = from - t.itemOffset 650 // For reading N items, we need N+1 indices. 651 buffer := make([]byte, (count+1)*indexEntrySize) 652 if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil { 653 return nil, err 654 } 655 var ( 656 indices []*indexEntry 657 offset int 658 ) 659 for i := from; i <= from+count; i++ { 660 index := new(indexEntry) 661 index.unmarshalBinary(buffer[offset:]) 662 offset += indexEntrySize 663 indices = append(indices, index) 664 } 665 if from == 0 { 666 // Special case if we're reading the first item in the freezer. We assume that 667 // the first item always start from zero(regarding the deletion, we 668 // only support deletion by files, so that the assumption is held). 669 // This means we can use the first item metadata to carry information about 670 // the 'global' offset, for the deletion-case 671 indices[0].offset = 0 672 indices[0].filenum = indices[1].filenum 673 } 674 return indices, nil 675 } 676 677 // Retrieve looks up the data offset of an item with the given number and retrieves 678 // the raw binary blob from the data file. 679 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 680 items, err := t.RetrieveItems(item, 1, 0) 681 if err != nil { 682 return nil, err 683 } 684 return items[0], nil 685 } 686 687 // RetrieveItems returns multiple items in sequence, starting from the index 'start'. 688 // It will return at most 'max' items, but will abort earlier to respect the 689 // 'maxBytes' argument. However, if the 'maxBytes' is smaller than the size of one 690 // item, it _will_ return one element and possibly overflow the maxBytes. 691 func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, error) { 692 // First we read the 'raw' data, which might be compressed. 693 diskData, sizes, err := t.retrieveItems(start, count, maxBytes) 694 if err != nil { 695 return nil, err 696 } 697 var ( 698 output = make([][]byte, 0, count) 699 offset int // offset for reading 700 outputSize int // size of uncompressed data 701 ) 702 // Now slice up the data and decompress. 703 for i, diskSize := range sizes { 704 item := diskData[offset : offset+diskSize] 705 offset += diskSize 706 decompressedSize := diskSize 707 if !t.noCompression { 708 decompressedSize, _ = snappy.DecodedLen(item) 709 } 710 if i > 0 && uint64(outputSize+decompressedSize) > maxBytes { 711 break 712 } 713 if !t.noCompression { 714 data, err := snappy.Decode(nil, item) 715 if err != nil { 716 return nil, err 717 } 718 output = append(output, data) 719 } else { 720 output = append(output, item) 721 } 722 outputSize += decompressedSize 723 } 724 return output, nil 725 } 726 727 // retrieveItems reads up to 'count' items from the table. It reads at least 728 // one item, but otherwise avoids reading more than maxBytes bytes. 729 // It returns the (potentially compressed) data, and the sizes. 730 func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) { 731 t.lock.RLock() 732 defer t.lock.RUnlock() 733 734 // Ensure the table and the item are accessible 735 if t.index == nil || t.head == nil { 736 return nil, nil, errClosed 737 } 738 var ( 739 items = atomic.LoadUint64(&t.items) // the total items(head + 1) 740 hidden = atomic.LoadUint64(&t.itemHidden) // the number of hidden items 741 ) 742 // Ensure the start is written, not deleted from the tail, and that the 743 // caller actually wants something 744 if items <= start || hidden > start || count == 0 { 745 return nil, nil, errOutOfBounds 746 } 747 if start+count > items { 748 count = items - start 749 } 750 var ( 751 output = make([]byte, maxBytes) // Buffer to read data into 752 outputSize int // Used size of that buffer 753 ) 754 // readData is a helper method to read a single data item from disk. 755 readData := func(fileId, start uint32, length int) error { 756 // In case a small limit is used, and the elements are large, may need to 757 // realloc the read-buffer when reading the first (and only) item. 758 if len(output) < length { 759 output = make([]byte, length) 760 } 761 dataFile, exist := t.files[fileId] 762 if !exist { 763 return fmt.Errorf("missing data file %d", fileId) 764 } 765 if _, err := dataFile.ReadAt(output[outputSize:outputSize+length], int64(start)); err != nil { 766 return err 767 } 768 outputSize += length 769 return nil 770 } 771 // Read all the indexes in one go 772 indices, err := t.getIndices(start, count) 773 if err != nil { 774 return nil, nil, err 775 } 776 var ( 777 sizes []int // The sizes for each element 778 totalSize = 0 // The total size of all data read so far 779 readStart = indices[0].offset // Where, in the file, to start reading 780 unreadSize = 0 // The size of the as-yet-unread data 781 ) 782 783 for i, firstIndex := range indices[:len(indices)-1] { 784 secondIndex := indices[i+1] 785 // Determine the size of the item. 786 offset1, offset2, _ := firstIndex.bounds(secondIndex) 787 size := int(offset2 - offset1) 788 // Crossing a file boundary? 789 if secondIndex.filenum != firstIndex.filenum { 790 // If we have unread data in the first file, we need to do that read now. 791 if unreadSize > 0 { 792 if err := readData(firstIndex.filenum, readStart, unreadSize); err != nil { 793 return nil, nil, err 794 } 795 unreadSize = 0 796 } 797 readStart = 0 798 } 799 if i > 0 && uint64(totalSize+size) > maxBytes { 800 // About to break out due to byte limit being exceeded. We don't 801 // read this last item, but we need to do the deferred reads now. 802 if unreadSize > 0 { 803 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 804 return nil, nil, err 805 } 806 } 807 break 808 } 809 // Defer the read for later 810 unreadSize += size 811 totalSize += size 812 sizes = append(sizes, size) 813 if i == len(indices)-2 || uint64(totalSize) > maxBytes { 814 // Last item, need to do the read now 815 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 816 return nil, nil, err 817 } 818 break 819 } 820 } 821 return output[:outputSize], sizes, nil 822 } 823 824 // has returns an indicator whether the specified number data is still accessible 825 // in the freezer table. 826 func (t *freezerTable) has(number uint64) bool { 827 return atomic.LoadUint64(&t.items) > number && atomic.LoadUint64(&t.itemHidden) <= number 828 } 829 830 // size returns the total data size in the freezer table. 831 func (t *freezerTable) size() (uint64, error) { 832 t.lock.RLock() 833 defer t.lock.RUnlock() 834 835 return t.sizeNolock() 836 } 837 838 // sizeNolock returns the total data size in the freezer table without obtaining 839 // the mutex first. 840 func (t *freezerTable) sizeNolock() (uint64, error) { 841 stat, err := t.index.Stat() 842 if err != nil { 843 return 0, err 844 } 845 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) 846 return total, nil 847 } 848 849 // advanceHead should be called when the current head file would outgrow the file limits, 850 // and a new file must be opened. The caller of this method must hold the write-lock 851 // before calling this method. 852 func (t *freezerTable) advanceHead() error { 853 t.lock.Lock() 854 defer t.lock.Unlock() 855 856 // We open the next file in truncated mode -- if this file already 857 // exists, we need to start over from scratch on it. 858 nextID := t.headId + 1 859 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 860 if err != nil { 861 return err 862 } 863 864 // Close old file, and reopen in RDONLY mode. 865 t.releaseFile(t.headId) 866 t.openFile(t.headId, openFreezerFileForReadOnly) 867 868 // Swap out the current head. 869 t.head = newHead 870 t.headBytes = 0 871 t.headId = nextID 872 return nil 873 } 874 875 // Sync pushes any pending data from memory out to disk. This is an expensive 876 // operation, so use it with care. 877 func (t *freezerTable) Sync() error { 878 if err := t.index.Sync(); err != nil { 879 return err 880 } 881 if err := t.meta.Sync(); err != nil { 882 return err 883 } 884 return t.head.Sync() 885 } 886 887 func (t *freezerTable) dumpIndexStdout(start, stop int64) { 888 t.dumpIndex(os.Stdout, start, stop) 889 } 890 891 func (t *freezerTable) dumpIndexString(start, stop int64) string { 892 var out bytes.Buffer 893 out.WriteString("\n") 894 t.dumpIndex(&out, start, stop) 895 return out.String() 896 } 897 898 func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { 899 meta, err := readMetadata(t.meta) 900 if err != nil { 901 fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) 902 return 903 } 904 fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden)) 905 906 buf := make([]byte, indexEntrySize) 907 908 fmt.Fprintf(w, "| number | fileno | offset |\n") 909 fmt.Fprintf(w, "|--------|--------|--------|\n") 910 911 for i := uint64(start); ; i++ { 912 if _, err := t.index.ReadAt(buf, int64((i+1)*indexEntrySize)); err != nil { 913 break 914 } 915 var entry indexEntry 916 entry.unmarshalBinary(buf) 917 fmt.Fprintf(w, "| %03d | %03d | %03d | \n", i, entry.filenum, entry.offset) 918 if stop > 0 && i >= uint64(stop) { 919 break 920 } 921 } 922 fmt.Fprintf(w, "|--------------------------|\n") 923 }