github.com/theQRL/go-zond@v0.2.1/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "io" 25 "os" 26 "path/filepath" 27 "sync" 28 "sync/atomic" 29 30 "github.com/golang/snappy" 31 "github.com/theQRL/go-zond/common" 32 "github.com/theQRL/go-zond/log" 33 "github.com/theQRL/go-zond/metrics" 34 ) 35 36 var ( 37 // errClosed is returned if an operation attempts to read from or write to the 38 // freezer table after it has already been closed. 39 errClosed = errors.New("closed") 40 41 // errOutOfBounds is returned if the item requested is not contained within the 42 // freezer table. 43 errOutOfBounds = errors.New("out of bounds") 44 45 // errNotSupported is returned if the database doesn't support the required operation. 46 errNotSupported = errors.New("this operation is not supported") 47 ) 48 49 // indexEntry contains the number/id of the file that the data resides in, as well as the 50 // offset within the file to the end of the data. 51 // In serialized form, the filenum is stored as uint16. 52 type indexEntry struct { 53 filenum uint32 // stored as uint16 ( 2 bytes ) 54 offset uint32 // stored as uint32 ( 4 bytes ) 55 } 56 57 const indexEntrySize = 6 58 59 // unmarshalBinary deserializes binary b into the rawIndex entry. 60 func (i *indexEntry) unmarshalBinary(b []byte) { 61 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 62 i.offset = binary.BigEndian.Uint32(b[2:6]) 63 } 64 65 // append adds the encoded entry to the end of b. 66 func (i *indexEntry) append(b []byte) []byte { 67 offset := len(b) 68 out := append(b, make([]byte, indexEntrySize)...) 69 binary.BigEndian.PutUint16(out[offset:], uint16(i.filenum)) 70 binary.BigEndian.PutUint32(out[offset+2:], i.offset) 71 return out 72 } 73 74 // bounds returns the start- and end- offsets, and the file number of where to 75 // read there data item marked by the two index entries. The two entries are 76 // assumed to be sequential. 77 func (i *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) { 78 if i.filenum != end.filenum { 79 // If a piece of data 'crosses' a data-file, 80 // it's actually in one piece on the second data-file. 81 // We return a zero-indexEntry for the second file as start 82 return 0, end.offset, end.filenum 83 } 84 return i.offset, end.offset, end.filenum 85 } 86 87 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 88 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 89 // file (uncompressed 64 bit indices into the data file). 90 type freezerTable struct { 91 items atomic.Uint64 // Number of items stored in the table (including items removed from tail) 92 itemOffset atomic.Uint64 // Number of items removed from the table 93 94 // itemHidden is the number of items marked as deleted. Tail deletion is 95 // only supported at file level which means the actual deletion will be 96 // delayed until the entire data file is marked as deleted. Before that 97 // these items will be hidden to prevent being visited again. The value 98 // should never be lower than itemOffset. 99 itemHidden atomic.Uint64 100 101 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 102 readonly bool 103 maxFileSize uint32 // Max file size for data-files 104 name string 105 path string 106 107 head *os.File // File descriptor for the data head of the table 108 index *os.File // File descriptor for the indexEntry file of the table 109 meta *os.File // File descriptor for metadata of the table 110 files map[uint32]*os.File // open files 111 headId uint32 // number of the currently active head file 112 tailId uint32 // number of the earliest file 113 114 headBytes int64 // Number of bytes written to the head file 115 readMeter metrics.Meter // Meter for measuring the effective amount of data read 116 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 117 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 118 119 logger log.Logger // Logger with database path and table name embedded 120 lock sync.RWMutex // Mutex protecting the data file descriptors 121 } 122 123 // newFreezerTable opens the given path as a freezer table. 124 func newFreezerTable(path, name string, disableSnappy, readonly bool) (*freezerTable, error) { 125 return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy, readonly) 126 } 127 128 // newTable opens a freezer table, creating the data and index files if they are 129 // non-existent. Both files are truncated to the shortest common length to ensure 130 // they don't go out of sync. 131 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression, readonly bool) (*freezerTable, error) { 132 // Ensure the containing directory exists and open the indexEntry file 133 if err := os.MkdirAll(path, 0755); err != nil { 134 return nil, err 135 } 136 var idxName string 137 if noCompression { 138 idxName = fmt.Sprintf("%s.ridx", name) // raw index file 139 } else { 140 idxName = fmt.Sprintf("%s.cidx", name) // compressed index file 141 } 142 var ( 143 err error 144 index *os.File 145 meta *os.File 146 ) 147 if readonly { 148 // Will fail if table index file or meta file is not existent 149 index, err = openFreezerFileForReadOnly(filepath.Join(path, idxName)) 150 if err != nil { 151 return nil, err 152 } 153 meta, err = openFreezerFileForReadOnly(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 154 if err != nil { 155 return nil, err 156 } 157 } else { 158 index, err = openFreezerFileForAppend(filepath.Join(path, idxName)) 159 if err != nil { 160 return nil, err 161 } 162 meta, err = openFreezerFileForAppend(filepath.Join(path, fmt.Sprintf("%s.meta", name))) 163 if err != nil { 164 return nil, err 165 } 166 } 167 // Create the table and repair any past inconsistency 168 tab := &freezerTable{ 169 index: index, 170 meta: meta, 171 files: make(map[uint32]*os.File), 172 readMeter: readMeter, 173 writeMeter: writeMeter, 174 sizeGauge: sizeGauge, 175 name: name, 176 path: path, 177 logger: log.New("database", path, "table", name), 178 noCompression: noCompression, 179 readonly: readonly, 180 maxFileSize: maxFilesize, 181 } 182 if err := tab.repair(); err != nil { 183 tab.Close() 184 return nil, err 185 } 186 // Initialize the starting size counter 187 size, err := tab.sizeNolock() 188 if err != nil { 189 tab.Close() 190 return nil, err 191 } 192 tab.sizeGauge.Inc(int64(size)) 193 194 return tab, nil 195 } 196 197 // repair cross-checks the head and the index file and truncates them to 198 // be in sync with each other after a potential crash / data loss. 199 func (t *freezerTable) repair() error { 200 // Create a temporary offset buffer to init files with and read indexEntry into 201 buffer := make([]byte, indexEntrySize) 202 203 // If we've just created the files, initialize the index with the 0 indexEntry 204 stat, err := t.index.Stat() 205 if err != nil { 206 return err 207 } 208 if stat.Size() == 0 { 209 if _, err := t.index.Write(buffer); err != nil { 210 return err 211 } 212 } 213 // Ensure the index is a multiple of indexEntrySize bytes 214 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 215 truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path 216 } 217 // Retrieve the file sizes and prepare for truncation 218 if stat, err = t.index.Stat(); err != nil { 219 return err 220 } 221 offsetsSize := stat.Size() 222 223 // Open the head file 224 var ( 225 firstIndex indexEntry 226 lastIndex indexEntry 227 contentSize int64 228 contentExp int64 229 verbose bool 230 ) 231 // Read index zero, determine what file is the earliest 232 // and what item offset to use 233 t.index.ReadAt(buffer, 0) 234 firstIndex.unmarshalBinary(buffer) 235 236 // Assign the tail fields with the first stored index. 237 // The total removed items is represented with an uint32, 238 // which is not enough in theory but enough in practice. 239 // TODO: use uint64 to represent total removed items. 240 t.tailId = firstIndex.filenum 241 t.itemOffset.Store(uint64(firstIndex.offset)) 242 243 // Load metadata from the file 244 meta, err := loadMetadata(t.meta, t.itemOffset.Load()) 245 if err != nil { 246 return err 247 } 248 t.itemHidden.Store(meta.VirtualTail) 249 250 // Read the last index, use the default value in case the freezer is empty 251 if offsetsSize == indexEntrySize { 252 lastIndex = indexEntry{filenum: t.tailId, offset: 0} 253 } else { 254 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 255 lastIndex.unmarshalBinary(buffer) 256 } 257 if t.readonly { 258 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForReadOnly) 259 } else { 260 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 261 } 262 if err != nil { 263 return err 264 } 265 if stat, err = t.head.Stat(); err != nil { 266 return err 267 } 268 contentSize = stat.Size() 269 270 // Keep truncating both files until they come in sync 271 contentExp = int64(lastIndex.offset) 272 for contentExp != contentSize { 273 verbose = true 274 // Truncate the head file to the last offset pointer 275 if contentExp < contentSize { 276 t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize) 277 if err := truncateFreezerFile(t.head, contentExp); err != nil { 278 return err 279 } 280 contentSize = contentExp 281 } 282 // Truncate the index to point within the head file 283 if contentExp > contentSize { 284 t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize) 285 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 286 return err 287 } 288 offsetsSize -= indexEntrySize 289 290 // Read the new head index, use the default value in case 291 // the freezer is already empty. 292 var newLastIndex indexEntry 293 if offsetsSize == indexEntrySize { 294 newLastIndex = indexEntry{filenum: t.tailId, offset: 0} 295 } else { 296 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 297 newLastIndex.unmarshalBinary(buffer) 298 } 299 // We might have slipped back into an earlier head-file here 300 if newLastIndex.filenum != lastIndex.filenum { 301 // Release earlier opened file 302 t.releaseFile(lastIndex.filenum) 303 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 304 return err 305 } 306 if stat, err = t.head.Stat(); err != nil { 307 // TODO, anything more we can do here? 308 // A data file has gone missing... 309 return err 310 } 311 contentSize = stat.Size() 312 } 313 lastIndex = newLastIndex 314 contentExp = int64(lastIndex.offset) 315 } 316 } 317 // Sync() fails for read-only files on windows. 318 if !t.readonly { 319 // Ensure all reparation changes have been written to disk 320 if err := t.index.Sync(); err != nil { 321 return err 322 } 323 if err := t.head.Sync(); err != nil { 324 return err 325 } 326 if err := t.meta.Sync(); err != nil { 327 return err 328 } 329 } 330 // Update the item and byte counters and return 331 t.items.Store(t.itemOffset.Load() + uint64(offsetsSize/indexEntrySize-1)) // last indexEntry points to the end of the data file 332 t.headBytes = contentSize 333 t.headId = lastIndex.filenum 334 335 // Delete the leftover files because of head deletion 336 t.releaseFilesAfter(t.headId, true) 337 338 // Delete the leftover files because of tail deletion 339 t.releaseFilesBefore(t.tailId, true) 340 341 // Close opened files and preopen all files 342 if err := t.preopen(); err != nil { 343 return err 344 } 345 if verbose { 346 t.logger.Info("Chain freezer table opened", "items", t.items.Load(), "size", t.headBytes) 347 } else { 348 t.logger.Debug("Chain freezer table opened", "items", t.items.Load(), "size", common.StorageSize(t.headBytes)) 349 } 350 return nil 351 } 352 353 // preopen opens all files that the freezer will need. This method should be called from an init-context, 354 // since it assumes that it doesn't have to bother with locking 355 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 356 // obtain a write-lock within Retrieve. 357 func (t *freezerTable) preopen() (err error) { 358 // The repair might have already opened (some) files 359 t.releaseFilesAfter(0, false) 360 361 // Open all except head in RDONLY 362 for i := t.tailId; i < t.headId; i++ { 363 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 364 return err 365 } 366 } 367 if t.readonly { 368 t.head, err = t.openFile(t.headId, openFreezerFileForReadOnly) 369 } else { 370 // Open head in read/write 371 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 372 } 373 return err 374 } 375 376 // truncateHead discards any recent data above the provided threshold number. 377 func (t *freezerTable) truncateHead(items uint64) error { 378 t.lock.Lock() 379 defer t.lock.Unlock() 380 381 // Ensure the given truncate target falls in the correct range 382 existing := t.items.Load() 383 if existing <= items { 384 return nil 385 } 386 if items < t.itemHidden.Load() { 387 return errors.New("truncation below tail") 388 } 389 // We need to truncate, save the old size for metrics tracking 390 oldSize, err := t.sizeNolock() 391 if err != nil { 392 return err 393 } 394 // Something's out of sync, truncate the table's offset index 395 log := t.logger.Debug 396 if existing > items+1 { 397 log = t.logger.Warn // Only loud warn if we delete multiple items 398 } 399 log("Truncating freezer table", "items", existing, "limit", items) 400 401 // Truncate the index file first, the tail position is also considered 402 // when calculating the new freezer table length. 403 length := items - t.itemOffset.Load() 404 if err := truncateFreezerFile(t.index, int64(length+1)*indexEntrySize); err != nil { 405 return err 406 } 407 // Calculate the new expected size of the data file and truncate it 408 var expected indexEntry 409 if length == 0 { 410 expected = indexEntry{filenum: t.tailId, offset: 0} 411 } else { 412 buffer := make([]byte, indexEntrySize) 413 if _, err := t.index.ReadAt(buffer, int64(length*indexEntrySize)); err != nil { 414 return err 415 } 416 expected.unmarshalBinary(buffer) 417 } 418 // We might need to truncate back to older files 419 if expected.filenum != t.headId { 420 // If already open for reading, force-reopen for writing 421 t.releaseFile(expected.filenum) 422 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 423 if err != nil { 424 return err 425 } 426 // Release any files _after the current head -- both the previous head 427 // and any files which may have been opened for reading 428 t.releaseFilesAfter(expected.filenum, true) 429 // Set back the historic head 430 t.head = newHead 431 t.headId = expected.filenum 432 } 433 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 434 return err 435 } 436 // All data files truncated, set internal counters and return 437 t.headBytes = int64(expected.offset) 438 t.items.Store(items) 439 440 // Retrieve the new size and update the total size counter 441 newSize, err := t.sizeNolock() 442 if err != nil { 443 return err 444 } 445 t.sizeGauge.Dec(int64(oldSize - newSize)) 446 return nil 447 } 448 449 // truncateTail discards any recent data before the provided threshold number. 450 func (t *freezerTable) truncateTail(items uint64) error { 451 t.lock.Lock() 452 defer t.lock.Unlock() 453 454 // Ensure the given truncate target falls in the correct range 455 if t.itemHidden.Load() >= items { 456 return nil 457 } 458 if t.items.Load() < items { 459 return errors.New("truncation above head") 460 } 461 // Load the new tail index by the given new tail position 462 var ( 463 newTailId uint32 464 buffer = make([]byte, indexEntrySize) 465 ) 466 if t.items.Load() == items { 467 newTailId = t.headId 468 } else { 469 offset := items - t.itemOffset.Load() 470 if _, err := t.index.ReadAt(buffer, int64((offset+1)*indexEntrySize)); err != nil { 471 return err 472 } 473 var newTail indexEntry 474 newTail.unmarshalBinary(buffer) 475 newTailId = newTail.filenum 476 } 477 // Update the virtual tail marker and hidden these entries in table. 478 t.itemHidden.Store(items) 479 if err := writeMetadata(t.meta, newMetadata(items)); err != nil { 480 return err 481 } 482 // Hidden items still fall in the current tail file, no data file 483 // can be dropped. 484 if t.tailId == newTailId { 485 return nil 486 } 487 // Hidden items fall in the incorrect range, returns the error. 488 if t.tailId > newTailId { 489 return fmt.Errorf("invalid index, tail-file %d, item-file %d", t.tailId, newTailId) 490 } 491 // Hidden items exceed the current tail file, drop the relevant 492 // data files. We need to truncate, save the old size for metrics 493 // tracking. 494 oldSize, err := t.sizeNolock() 495 if err != nil { 496 return err 497 } 498 // Count how many items can be deleted from the file. 499 var ( 500 newDeleted = items 501 deleted = t.itemOffset.Load() 502 ) 503 for current := items - 1; current >= deleted; current -= 1 { 504 if _, err := t.index.ReadAt(buffer, int64((current-deleted+1)*indexEntrySize)); err != nil { 505 return err 506 } 507 var pre indexEntry 508 pre.unmarshalBinary(buffer) 509 if pre.filenum != newTailId { 510 break 511 } 512 newDeleted = current 513 } 514 // Commit the changes of metadata file first before manipulating 515 // the indexes file. 516 if err := t.meta.Sync(); err != nil { 517 return err 518 } 519 // Truncate the deleted index entries from the index file. 520 err = copyFrom(t.index.Name(), t.index.Name(), indexEntrySize*(newDeleted-deleted+1), func(f *os.File) error { 521 tailIndex := indexEntry{ 522 filenum: newTailId, 523 offset: uint32(newDeleted), 524 } 525 _, err := f.Write(tailIndex.append(nil)) 526 return err 527 }) 528 if err != nil { 529 return err 530 } 531 // Reopen the modified index file to load the changes 532 if err := t.index.Close(); err != nil { 533 return err 534 } 535 t.index, err = openFreezerFileForAppend(t.index.Name()) 536 if err != nil { 537 return err 538 } 539 // Release any files before the current tail 540 t.tailId = newTailId 541 t.itemOffset.Store(newDeleted) 542 t.releaseFilesBefore(t.tailId, true) 543 544 // Retrieve the new size and update the total size counter 545 newSize, err := t.sizeNolock() 546 if err != nil { 547 return err 548 } 549 t.sizeGauge.Dec(int64(oldSize - newSize)) 550 return nil 551 } 552 553 // Close closes all opened files. 554 func (t *freezerTable) Close() error { 555 t.lock.Lock() 556 defer t.lock.Unlock() 557 558 var errs []error 559 doClose := func(f *os.File, sync bool, close bool) { 560 if sync && !t.readonly { 561 if err := f.Sync(); err != nil { 562 errs = append(errs, err) 563 } 564 } 565 if close { 566 if err := f.Close(); err != nil { 567 errs = append(errs, err) 568 } 569 } 570 } 571 // Trying to fsync a file opened in rdonly causes "Access denied" 572 // error on Windows. 573 doClose(t.index, true, true) 574 doClose(t.meta, true, true) 575 // The preopened non-head data-files are all opened in readonly. 576 // The head is opened in rw-mode, so we sync it here - but since it's also 577 // part of t.files, it will be closed in the loop below. 578 doClose(t.head, true, false) // sync but do not close 579 for _, f := range t.files { 580 doClose(f, false, true) // close but do not sync 581 } 582 t.index = nil 583 t.meta = nil 584 t.head = nil 585 586 if errs != nil { 587 return fmt.Errorf("%v", errs) 588 } 589 return nil 590 } 591 592 // openFile assumes that the write-lock is held by the caller 593 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 594 var exist bool 595 if f, exist = t.files[num]; !exist { 596 var name string 597 if t.noCompression { 598 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 599 } else { 600 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 601 } 602 f, err = opener(filepath.Join(t.path, name)) 603 if err != nil { 604 return nil, err 605 } 606 t.files[num] = f 607 } 608 return f, err 609 } 610 611 // releaseFile closes a file, and removes it from the open file cache. 612 // Assumes that the caller holds the write lock 613 func (t *freezerTable) releaseFile(num uint32) { 614 if f, exist := t.files[num]; exist { 615 delete(t.files, num) 616 f.Close() 617 } 618 } 619 620 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 621 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 622 for fnum, f := range t.files { 623 if fnum > num { 624 delete(t.files, fnum) 625 f.Close() 626 if remove { 627 os.Remove(f.Name()) 628 } 629 } 630 } 631 } 632 633 // releaseFilesBefore closes all open files with a lower number, and optionally also deletes the files 634 func (t *freezerTable) releaseFilesBefore(num uint32, remove bool) { 635 for fnum, f := range t.files { 636 if fnum < num { 637 delete(t.files, fnum) 638 f.Close() 639 if remove { 640 os.Remove(f.Name()) 641 } 642 } 643 } 644 } 645 646 // getIndices returns the index entries for the given from-item, covering 'count' items. 647 // N.B: The actual number of returned indices for N items will always be N+1 (unless an 648 // error is returned). 649 // OBS: This method assumes that the caller has already verified (and/or trimmed) the range 650 // so that the items are within bounds. If this method is used to read out of bounds, 651 // it will return error. 652 func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) { 653 // Apply the table-offset 654 from = from - t.itemOffset.Load() 655 // For reading N items, we need N+1 indices. 656 buffer := make([]byte, (count+1)*indexEntrySize) 657 if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil { 658 return nil, err 659 } 660 var ( 661 indices []*indexEntry 662 offset int 663 ) 664 for i := from; i <= from+count; i++ { 665 index := new(indexEntry) 666 index.unmarshalBinary(buffer[offset:]) 667 offset += indexEntrySize 668 indices = append(indices, index) 669 } 670 if from == 0 { 671 // Special case if we're reading the first item in the freezer. We assume that 672 // the first item always start from zero(regarding the deletion, we 673 // only support deletion by files, so that the assumption is held). 674 // This means we can use the first item metadata to carry information about 675 // the 'global' offset, for the deletion-case 676 indices[0].offset = 0 677 indices[0].filenum = indices[1].filenum 678 } 679 return indices, nil 680 } 681 682 // Retrieve looks up the data offset of an item with the given number and retrieves 683 // the raw binary blob from the data file. 684 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 685 items, err := t.RetrieveItems(item, 1, 0) 686 if err != nil { 687 return nil, err 688 } 689 return items[0], nil 690 } 691 692 // RetrieveItems returns multiple items in sequence, starting from the index 'start'. 693 // It will return at most 'max' items, but will abort earlier to respect the 694 // 'maxBytes' argument. However, if the 'maxBytes' is smaller than the size of one 695 // item, it _will_ return one element and possibly overflow the maxBytes. 696 func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, error) { 697 // First we read the 'raw' data, which might be compressed. 698 diskData, sizes, err := t.retrieveItems(start, count, maxBytes) 699 if err != nil { 700 return nil, err 701 } 702 var ( 703 output = make([][]byte, 0, count) 704 offset int // offset for reading 705 outputSize int // size of uncompressed data 706 ) 707 // Now slice up the data and decompress. 708 for i, diskSize := range sizes { 709 item := diskData[offset : offset+diskSize] 710 offset += diskSize 711 decompressedSize := diskSize 712 if !t.noCompression { 713 decompressedSize, _ = snappy.DecodedLen(item) 714 } 715 if i > 0 && maxBytes != 0 && uint64(outputSize+decompressedSize) > maxBytes { 716 break 717 } 718 if !t.noCompression { 719 data, err := snappy.Decode(nil, item) 720 if err != nil { 721 return nil, err 722 } 723 output = append(output, data) 724 } else { 725 output = append(output, item) 726 } 727 outputSize += decompressedSize 728 } 729 return output, nil 730 } 731 732 // retrieveItems reads up to 'count' items from the table. It reads at least 733 // one item, but otherwise avoids reading more than maxBytes bytes. Freezer 734 // will ignore the size limitation and continuously allocate memory to store 735 // data if maxBytes is 0. It returns the (potentially compressed) data, and 736 // the sizes. 737 func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) { 738 t.lock.RLock() 739 defer t.lock.RUnlock() 740 741 // Ensure the table and the item are accessible 742 if t.index == nil || t.head == nil || t.meta == nil { 743 return nil, nil, errClosed 744 } 745 var ( 746 items = t.items.Load() // the total items(head + 1) 747 hidden = t.itemHidden.Load() // the number of hidden items 748 ) 749 // Ensure the start is written, not deleted from the tail, and that the 750 // caller actually wants something 751 if items <= start || hidden > start || count == 0 { 752 return nil, nil, errOutOfBounds 753 } 754 if start+count > items { 755 count = items - start 756 } 757 var output []byte // Buffer to read data into 758 if maxBytes != 0 { 759 output = make([]byte, 0, maxBytes) 760 } else { 761 output = make([]byte, 0, 1024) // initial buffer cap 762 } 763 // readData is a helper method to read a single data item from disk. 764 readData := func(fileId, start uint32, length int) error { 765 output = grow(output, length) 766 dataFile, exist := t.files[fileId] 767 if !exist { 768 return fmt.Errorf("missing data file %d", fileId) 769 } 770 if _, err := dataFile.ReadAt(output[len(output)-length:], int64(start)); err != nil { 771 return err 772 } 773 return nil 774 } 775 // Read all the indexes in one go 776 indices, err := t.getIndices(start, count) 777 if err != nil { 778 return nil, nil, err 779 } 780 var ( 781 sizes []int // The sizes for each element 782 totalSize = 0 // The total size of all data read so far 783 readStart = indices[0].offset // Where, in the file, to start reading 784 unreadSize = 0 // The size of the as-yet-unread data 785 ) 786 787 for i, firstIndex := range indices[:len(indices)-1] { 788 secondIndex := indices[i+1] 789 // Determine the size of the item. 790 offset1, offset2, _ := firstIndex.bounds(secondIndex) 791 size := int(offset2 - offset1) 792 // Crossing a file boundary? 793 if secondIndex.filenum != firstIndex.filenum { 794 // If we have unread data in the first file, we need to do that read now. 795 if unreadSize > 0 { 796 if err := readData(firstIndex.filenum, readStart, unreadSize); err != nil { 797 return nil, nil, err 798 } 799 unreadSize = 0 800 } 801 readStart = 0 802 } 803 if i > 0 && uint64(totalSize+size) > maxBytes && maxBytes != 0 { 804 // About to break out due to byte limit being exceeded. We don't 805 // read this last item, but we need to do the deferred reads now. 806 if unreadSize > 0 { 807 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 808 return nil, nil, err 809 } 810 } 811 break 812 } 813 // Defer the read for later 814 unreadSize += size 815 totalSize += size 816 sizes = append(sizes, size) 817 if i == len(indices)-2 || (uint64(totalSize) > maxBytes && maxBytes != 0) { 818 // Last item, need to do the read now 819 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 820 return nil, nil, err 821 } 822 break 823 } 824 } 825 826 // Update metrics. 827 t.readMeter.Mark(int64(totalSize)) 828 return output, sizes, nil 829 } 830 831 // has returns an indicator whether the specified number data is still accessible 832 // in the freezer table. 833 func (t *freezerTable) has(number uint64) bool { 834 return t.items.Load() > number && t.itemHidden.Load() <= number 835 } 836 837 // size returns the total data size in the freezer table. 838 func (t *freezerTable) size() (uint64, error) { 839 t.lock.RLock() 840 defer t.lock.RUnlock() 841 842 return t.sizeNolock() 843 } 844 845 // sizeNolock returns the total data size in the freezer table without obtaining 846 // the mutex first. 847 func (t *freezerTable) sizeNolock() (uint64, error) { 848 stat, err := t.index.Stat() 849 if err != nil { 850 return 0, err 851 } 852 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) 853 return total, nil 854 } 855 856 // advanceHead should be called when the current head file would outgrow the file limits, 857 // and a new file must be opened. The caller of this method must hold the write-lock 858 // before calling this method. 859 func (t *freezerTable) advanceHead() error { 860 t.lock.Lock() 861 defer t.lock.Unlock() 862 863 // We open the next file in truncated mode -- if this file already 864 // exists, we need to start over from scratch on it. 865 nextID := t.headId + 1 866 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 867 if err != nil { 868 return err 869 } 870 // Commit the contents of the old file to stable storage and 871 // tear it down. It will be re-opened in read-only mode. 872 if err := t.head.Sync(); err != nil { 873 return err 874 } 875 t.releaseFile(t.headId) 876 t.openFile(t.headId, openFreezerFileForReadOnly) 877 878 // Swap out the current head. 879 t.head = newHead 880 t.headBytes = 0 881 t.headId = nextID 882 return nil 883 } 884 885 // Sync pushes any pending data from memory out to disk. This is an expensive 886 // operation, so use it with care. 887 func (t *freezerTable) Sync() error { 888 t.lock.Lock() 889 defer t.lock.Unlock() 890 if t.index == nil || t.head == nil || t.meta == nil { 891 return errClosed 892 } 893 var err error 894 trackError := func(e error) { 895 if e != nil && err == nil { 896 err = e 897 } 898 } 899 900 trackError(t.index.Sync()) 901 trackError(t.meta.Sync()) 902 trackError(t.head.Sync()) 903 return err 904 } 905 906 func (t *freezerTable) dumpIndexStdout(start, stop int64) { 907 t.dumpIndex(os.Stdout, start, stop) 908 } 909 910 func (t *freezerTable) dumpIndexString(start, stop int64) string { 911 var out bytes.Buffer 912 out.WriteString("\n") 913 t.dumpIndex(&out, start, stop) 914 return out.String() 915 } 916 917 func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) { 918 meta, err := readMetadata(t.meta) 919 if err != nil { 920 fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) 921 return 922 } 923 fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version, 924 t.items.Load(), t.itemOffset.Load(), t.itemHidden.Load()) 925 926 buf := make([]byte, indexEntrySize) 927 928 fmt.Fprintf(w, "| number | fileno | offset |\n") 929 fmt.Fprintf(w, "|--------|--------|--------|\n") 930 931 for i := uint64(start); ; i++ { 932 if _, err := t.index.ReadAt(buf, int64((i+1)*indexEntrySize)); err != nil { 933 break 934 } 935 var entry indexEntry 936 entry.unmarshalBinary(buf) 937 fmt.Fprintf(w, "| %03d | %03d | %03d | \n", i, entry.filenum, entry.offset) 938 if stop > 0 && i >= uint64(stop) { 939 break 940 } 941 } 942 fmt.Fprintf(w, "|--------------------------|\n") 943 }