github.com/DxChainNetwork/dxc@v0.8.1-0.20220824085222-1162e304b6e7/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "path/filepath" 26 "sync" 27 "sync/atomic" 28 29 "github.com/DxChainNetwork/dxc/common" 30 "github.com/DxChainNetwork/dxc/log" 31 "github.com/DxChainNetwork/dxc/metrics" 32 "github.com/golang/snappy" 33 ) 34 35 var ( 36 // errClosed is returned if an operation attempts to read from or write to the 37 // freezer table after it has already been closed. 38 errClosed = errors.New("closed") 39 40 // errOutOfBounds is returned if the item requested is not contained within the 41 // freezer table. 42 errOutOfBounds = errors.New("out of bounds") 43 44 // errNotSupported is returned if the database doesn't support the required operation. 45 errNotSupported = errors.New("this operation is not supported") 46 ) 47 48 // indexEntry contains the number/id of the file that the data resides in, aswell as the 49 // offset within the file to the end of the data 50 // In serialized form, the filenum is stored as uint16. 51 type indexEntry struct { 52 filenum uint32 // stored as uint16 ( 2 bytes) 53 offset uint32 // stored as uint32 ( 4 bytes) 54 } 55 56 const indexEntrySize = 6 57 58 // unmarshallBinary deserializes binary b into the rawIndex entry. 59 func (i *indexEntry) unmarshalBinary(b []byte) error { 60 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 61 i.offset = binary.BigEndian.Uint32(b[2:6]) 62 return nil 63 } 64 65 // marshallBinary serializes the rawIndex entry into binary. 66 func (i *indexEntry) marshallBinary() []byte { 67 b := make([]byte, indexEntrySize) 68 binary.BigEndian.PutUint16(b[:2], uint16(i.filenum)) 69 binary.BigEndian.PutUint32(b[2:6], i.offset) 70 return b 71 } 72 73 // bounds returns the start- and end- offsets, and the file number of where to 74 // read there data item marked by the two index entries. The two entries are 75 // assumed to be sequential. 76 func (start *indexEntry) bounds(end *indexEntry) (startOffset, endOffset, fileId uint32) { 77 if start.filenum != end.filenum { 78 // If a piece of data 'crosses' a data-file, 79 // it's actually in one piece on the second data-file. 80 // We return a zero-indexEntry for the second file as start 81 return 0, end.offset, end.filenum 82 } 83 return start.offset, end.offset, end.filenum 84 } 85 86 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 87 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 88 // file (uncompressed 64 bit indices into the data file). 89 type freezerTable struct { 90 // WARNING: The `items` field is accessed atomically. On 32 bit platforms, only 91 // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, 92 // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). 93 items uint64 // Number of items stored in the table (including items removed from tail) 94 95 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 96 maxFileSize uint32 // Max file size for data-files 97 name string 98 path string 99 100 head *os.File // File descriptor for the data head of the table 101 files map[uint32]*os.File // open files 102 headId uint32 // number of the currently active head file 103 tailId uint32 // number of the earliest file 104 index *os.File // File descriptor for the indexEntry file of the table 105 106 // In the case that old items are deleted (from the tail), we use itemOffset 107 // to count how many historic items have gone missing. 108 itemOffset uint32 // Offset (number of discarded items) 109 110 headBytes uint32 // Number of bytes written to the head file 111 readMeter metrics.Meter // Meter for measuring the effective amount of data read 112 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 113 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 114 115 logger log.Logger // Logger with database path and table name ambedded 116 lock sync.RWMutex // Mutex protecting the data file descriptors 117 } 118 119 // NewFreezerTable opens the given path as a freezer table. 120 func NewFreezerTable(path, name string, disableSnappy bool) (*freezerTable, error) { 121 return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, disableSnappy) 122 } 123 124 // newTable opens a freezer table with default settings - 2G files 125 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, disableSnappy bool) (*freezerTable, error) { 126 return newCustomTable(path, name, readMeter, writeMeter, sizeGauge, 2*1000*1000*1000, disableSnappy) 127 } 128 129 // openFreezerFileForAppend opens a freezer table file and seeks to the end 130 func openFreezerFileForAppend(filename string) (*os.File, error) { 131 // Open the file without the O_APPEND flag 132 // because it has differing behaviour during Truncate operations 133 // on different OS's 134 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644) 135 if err != nil { 136 return nil, err 137 } 138 // Seek to end for append 139 if _, err = file.Seek(0, io.SeekEnd); err != nil { 140 return nil, err 141 } 142 return file, nil 143 } 144 145 // openFreezerFileForReadOnly opens a freezer table file for read only access 146 func openFreezerFileForReadOnly(filename string) (*os.File, error) { 147 return os.OpenFile(filename, os.O_RDONLY, 0644) 148 } 149 150 // openFreezerFileTruncated opens a freezer table making sure it is truncated 151 func openFreezerFileTruncated(filename string) (*os.File, error) { 152 return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) 153 } 154 155 // truncateFreezerFile resizes a freezer table file and seeks to the end 156 func truncateFreezerFile(file *os.File, size int64) error { 157 if err := file.Truncate(size); err != nil { 158 return err 159 } 160 // Seek to end for append 161 if _, err := file.Seek(0, io.SeekEnd); err != nil { 162 return err 163 } 164 return nil 165 } 166 167 // newCustomTable opens a freezer table, creating the data and index files if they are 168 // non existent. Both files are truncated to the shortest common length to ensure 169 // they don't go out of sync. 170 func newCustomTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression bool) (*freezerTable, error) { 171 // Ensure the containing directory exists and open the indexEntry file 172 if err := os.MkdirAll(path, 0755); err != nil { 173 return nil, err 174 } 175 var idxName string 176 if noCompression { 177 // Raw idx 178 idxName = fmt.Sprintf("%s.ridx", name) 179 } else { 180 // Compressed idx 181 idxName = fmt.Sprintf("%s.cidx", name) 182 } 183 offsets, err := openFreezerFileForAppend(filepath.Join(path, idxName)) 184 if err != nil { 185 return nil, err 186 } 187 // Create the table and repair any past inconsistency 188 tab := &freezerTable{ 189 index: offsets, 190 files: make(map[uint32]*os.File), 191 readMeter: readMeter, 192 writeMeter: writeMeter, 193 sizeGauge: sizeGauge, 194 name: name, 195 path: path, 196 logger: log.New("database", path, "table", name), 197 noCompression: noCompression, 198 maxFileSize: maxFilesize, 199 } 200 if err := tab.repair(); err != nil { 201 tab.Close() 202 return nil, err 203 } 204 // Initialize the starting size counter 205 size, err := tab.sizeNolock() 206 if err != nil { 207 tab.Close() 208 return nil, err 209 } 210 tab.sizeGauge.Inc(int64(size)) 211 212 return tab, nil 213 } 214 215 // repair cross checks the head and the index file and truncates them to 216 // be in sync with each other after a potential crash / data loss. 217 func (t *freezerTable) repair() error { 218 // Create a temporary offset buffer to init files with and read indexEntry into 219 buffer := make([]byte, indexEntrySize) 220 221 // If we've just created the files, initialize the index with the 0 indexEntry 222 stat, err := t.index.Stat() 223 if err != nil { 224 return err 225 } 226 if stat.Size() == 0 { 227 if _, err := t.index.Write(buffer); err != nil { 228 return err 229 } 230 } 231 // Ensure the index is a multiple of indexEntrySize bytes 232 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 233 truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path 234 } 235 // Retrieve the file sizes and prepare for truncation 236 if stat, err = t.index.Stat(); err != nil { 237 return err 238 } 239 offsetsSize := stat.Size() 240 241 // Open the head file 242 var ( 243 firstIndex indexEntry 244 lastIndex indexEntry 245 contentSize int64 246 contentExp int64 247 ) 248 // Read index zero, determine what file is the earliest 249 // and what item offset to use 250 t.index.ReadAt(buffer, 0) 251 firstIndex.unmarshalBinary(buffer) 252 253 t.tailId = firstIndex.filenum 254 t.itemOffset = firstIndex.offset 255 256 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 257 lastIndex.unmarshalBinary(buffer) 258 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 259 if err != nil { 260 return err 261 } 262 if stat, err = t.head.Stat(); err != nil { 263 return err 264 } 265 contentSize = stat.Size() 266 267 // Keep truncating both files until they come in sync 268 contentExp = int64(lastIndex.offset) 269 270 for contentExp != contentSize { 271 // Truncate the head file to the last offset pointer 272 if contentExp < contentSize { 273 t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 274 if err := truncateFreezerFile(t.head, contentExp); err != nil { 275 return err 276 } 277 contentSize = contentExp 278 } 279 // Truncate the index to point within the head file 280 if contentExp > contentSize { 281 t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 282 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 283 return err 284 } 285 offsetsSize -= indexEntrySize 286 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 287 var newLastIndex indexEntry 288 newLastIndex.unmarshalBinary(buffer) 289 // We might have slipped back into an earlier head-file here 290 if newLastIndex.filenum != lastIndex.filenum { 291 // Release earlier opened file 292 t.releaseFile(lastIndex.filenum) 293 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 294 return err 295 } 296 if stat, err = t.head.Stat(); err != nil { 297 // TODO, anything more we can do here? 298 // A data file has gone missing... 299 return err 300 } 301 contentSize = stat.Size() 302 } 303 lastIndex = newLastIndex 304 contentExp = int64(lastIndex.offset) 305 } 306 } 307 // Ensure all reparation changes have been written to disk 308 if err := t.index.Sync(); err != nil { 309 return err 310 } 311 if err := t.head.Sync(); err != nil { 312 return err 313 } 314 // Update the item and byte counters and return 315 t.items = uint64(t.itemOffset) + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file 316 t.headBytes = uint32(contentSize) 317 t.headId = lastIndex.filenum 318 319 // Close opened files and preopen all files 320 if err := t.preopen(); err != nil { 321 return err 322 } 323 t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) 324 return nil 325 } 326 327 // preopen opens all files that the freezer will need. This method should be called from an init-context, 328 // since it assumes that it doesn't have to bother with locking 329 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 330 // obtain a write-lock within Retrieve. 331 func (t *freezerTable) preopen() (err error) { 332 // The repair might have already opened (some) files 333 t.releaseFilesAfter(0, false) 334 // Open all except head in RDONLY 335 for i := t.tailId; i < t.headId; i++ { 336 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 337 return err 338 } 339 } 340 // Open head in read/write 341 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 342 return err 343 } 344 345 // truncate discards any recent data above the provided threshold number. 346 func (t *freezerTable) truncate(items uint64) error { 347 t.lock.Lock() 348 defer t.lock.Unlock() 349 350 // If our item count is correct, don't do anything 351 existing := atomic.LoadUint64(&t.items) 352 if existing <= items { 353 return nil 354 } 355 // We need to truncate, save the old size for metrics tracking 356 oldSize, err := t.sizeNolock() 357 if err != nil { 358 return err 359 } 360 // Something's out of sync, truncate the table's offset index 361 log := t.logger.Debug 362 if existing > items+1 { 363 log = t.logger.Warn // Only loud warn if we delete multiple items 364 } 365 log("Truncating freezer table", "items", existing, "limit", items) 366 if err := truncateFreezerFile(t.index, int64(items+1)*indexEntrySize); err != nil { 367 return err 368 } 369 // Calculate the new expected size of the data file and truncate it 370 buffer := make([]byte, indexEntrySize) 371 if _, err := t.index.ReadAt(buffer, int64(items*indexEntrySize)); err != nil { 372 return err 373 } 374 var expected indexEntry 375 expected.unmarshalBinary(buffer) 376 377 // We might need to truncate back to older files 378 if expected.filenum != t.headId { 379 // If already open for reading, force-reopen for writing 380 t.releaseFile(expected.filenum) 381 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 382 if err != nil { 383 return err 384 } 385 // Release any files _after the current head -- both the previous head 386 // and any files which may have been opened for reading 387 t.releaseFilesAfter(expected.filenum, true) 388 // Set back the historic head 389 t.head = newHead 390 atomic.StoreUint32(&t.headId, expected.filenum) 391 } 392 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 393 return err 394 } 395 // All data files truncated, set internal counters and return 396 atomic.StoreUint64(&t.items, items) 397 atomic.StoreUint32(&t.headBytes, expected.offset) 398 399 // Retrieve the new size and update the total size counter 400 newSize, err := t.sizeNolock() 401 if err != nil { 402 return err 403 } 404 t.sizeGauge.Dec(int64(oldSize - newSize)) 405 406 return nil 407 } 408 409 // Close closes all opened files. 410 func (t *freezerTable) Close() error { 411 t.lock.Lock() 412 defer t.lock.Unlock() 413 414 var errs []error 415 if err := t.index.Close(); err != nil { 416 errs = append(errs, err) 417 } 418 t.index = nil 419 420 for _, f := range t.files { 421 if err := f.Close(); err != nil { 422 errs = append(errs, err) 423 } 424 } 425 t.head = nil 426 427 if errs != nil { 428 return fmt.Errorf("%v", errs) 429 } 430 return nil 431 } 432 433 // openFile assumes that the write-lock is held by the caller 434 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 435 var exist bool 436 if f, exist = t.files[num]; !exist { 437 var name string 438 if t.noCompression { 439 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 440 } else { 441 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 442 } 443 f, err = opener(filepath.Join(t.path, name)) 444 if err != nil { 445 return nil, err 446 } 447 t.files[num] = f 448 } 449 return f, err 450 } 451 452 // releaseFile closes a file, and removes it from the open file cache. 453 // Assumes that the caller holds the write lock 454 func (t *freezerTable) releaseFile(num uint32) { 455 if f, exist := t.files[num]; exist { 456 delete(t.files, num) 457 f.Close() 458 } 459 } 460 461 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 462 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 463 for fnum, f := range t.files { 464 if fnum > num { 465 delete(t.files, fnum) 466 f.Close() 467 if remove { 468 os.Remove(f.Name()) 469 } 470 } 471 } 472 } 473 474 // Append injects a binary blob at the end of the freezer table. The item number 475 // is a precautionary parameter to ensure data correctness, but the table will 476 // reject already existing data. 477 // 478 // Note, this method will *not* flush any data to disk so be sure to explicitly 479 // fsync before irreversibly deleting data from the database. 480 func (t *freezerTable) Append(item uint64, blob []byte) error { 481 // Encode the blob before the lock portion 482 if !t.noCompression { 483 blob = snappy.Encode(nil, blob) 484 } 485 // Read lock prevents competition with truncate 486 retry, err := t.append(item, blob, false) 487 if err != nil { 488 return err 489 } 490 if retry { 491 // Read lock was insufficient, retry with a writelock 492 _, err = t.append(item, blob, true) 493 } 494 return err 495 } 496 497 // append injects a binary blob at the end of the freezer table. 498 // Normally, inserts do not require holding the write-lock, so it should be invoked with 'wlock' set to 499 // false. 500 // However, if the data will grown the current file out of bounds, then this 501 // method will return 'true, nil', indicating that the caller should retry, this time 502 // with 'wlock' set to true. 503 func (t *freezerTable) append(item uint64, encodedBlob []byte, wlock bool) (bool, error) { 504 if wlock { 505 t.lock.Lock() 506 defer t.lock.Unlock() 507 } else { 508 t.lock.RLock() 509 defer t.lock.RUnlock() 510 } 511 // Ensure the table is still accessible 512 if t.index == nil || t.head == nil { 513 return false, errClosed 514 } 515 // Ensure only the next item can be written, nothing else 516 if atomic.LoadUint64(&t.items) != item { 517 return false, fmt.Errorf("appending unexpected item: want %d, have %d", t.items, item) 518 } 519 bLen := uint32(len(encodedBlob)) 520 if t.headBytes+bLen < bLen || 521 t.headBytes+bLen > t.maxFileSize { 522 // Writing would overflow, so we need to open a new data file. 523 // If we don't already hold the writelock, abort and let the caller 524 // invoke this method a second time. 525 if !wlock { 526 return true, nil 527 } 528 nextID := atomic.LoadUint32(&t.headId) + 1 529 // We open the next file in truncated mode -- if this file already 530 // exists, we need to start over from scratch on it 531 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 532 if err != nil { 533 return false, err 534 } 535 // Close old file, and reopen in RDONLY mode 536 t.releaseFile(t.headId) 537 t.openFile(t.headId, openFreezerFileForReadOnly) 538 539 // Swap out the current head 540 t.head = newHead 541 atomic.StoreUint32(&t.headBytes, 0) 542 atomic.StoreUint32(&t.headId, nextID) 543 } 544 if _, err := t.head.Write(encodedBlob); err != nil { 545 return false, err 546 } 547 newOffset := atomic.AddUint32(&t.headBytes, bLen) 548 idx := indexEntry{ 549 filenum: atomic.LoadUint32(&t.headId), 550 offset: newOffset, 551 } 552 // Write indexEntry 553 t.index.Write(idx.marshallBinary()) 554 555 t.writeMeter.Mark(int64(bLen + indexEntrySize)) 556 t.sizeGauge.Inc(int64(bLen + indexEntrySize)) 557 558 atomic.AddUint64(&t.items, 1) 559 return false, nil 560 } 561 562 // getIndices returns the index entries for the given from-item, covering 'count' items. 563 // N.B: The actual number of returned indices for N items will always be N+1 (unless an 564 // error is returned). 565 // OBS: This method assumes that the caller has already verified (and/or trimmed) the range 566 // so that the items are within bounds. If this method is used to read out of bounds, 567 // it will return error. 568 func (t *freezerTable) getIndices(from, count uint64) ([]*indexEntry, error) { 569 // Apply the table-offset 570 from = from - uint64(t.itemOffset) 571 // For reading N items, we need N+1 indices. 572 buffer := make([]byte, (count+1)*indexEntrySize) 573 if _, err := t.index.ReadAt(buffer, int64(from*indexEntrySize)); err != nil { 574 return nil, err 575 } 576 var ( 577 indices []*indexEntry 578 offset int 579 ) 580 for i := from; i <= from+count; i++ { 581 index := new(indexEntry) 582 index.unmarshalBinary(buffer[offset:]) 583 offset += indexEntrySize 584 indices = append(indices, index) 585 } 586 if from == 0 { 587 // Special case if we're reading the first item in the freezer. We assume that 588 // the first item always start from zero(regarding the deletion, we 589 // only support deletion by files, so that the assumption is held). 590 // This means we can use the first item metadata to carry information about 591 // the 'global' offset, for the deletion-case 592 indices[0].offset = 0 593 indices[0].filenum = indices[1].filenum 594 } 595 return indices, nil 596 } 597 598 // Retrieve looks up the data offset of an item with the given number and retrieves 599 // the raw binary blob from the data file. 600 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 601 items, err := t.RetrieveItems(item, 1, 0) 602 if err != nil { 603 return nil, err 604 } 605 return items[0], nil 606 } 607 608 // RetrieveItems returns multiple items in sequence, starting from the index 'start'. 609 // It will return at most 'max' items, but will abort earlier to respect the 610 // 'maxBytes' argument. However, if the 'maxBytes' is smaller than the size of one 611 // item, it _will_ return one element and possibly overflow the maxBytes. 612 func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, error) { 613 // First we read the 'raw' data, which might be compressed. 614 diskData, sizes, err := t.retrieveItems(start, count, maxBytes) 615 if err != nil { 616 return nil, err 617 } 618 var ( 619 output = make([][]byte, 0, count) 620 offset int // offset for reading 621 outputSize int // size of uncompressed data 622 ) 623 // Now slice up the data and decompress. 624 for i, diskSize := range sizes { 625 item := diskData[offset : offset+diskSize] 626 offset += diskSize 627 decompressedSize := diskSize 628 if !t.noCompression { 629 decompressedSize, _ = snappy.DecodedLen(item) 630 } 631 if i > 0 && uint64(outputSize+decompressedSize) > maxBytes { 632 break 633 } 634 if !t.noCompression { 635 data, err := snappy.Decode(nil, item) 636 if err != nil { 637 return nil, err 638 } 639 output = append(output, data) 640 } else { 641 output = append(output, item) 642 } 643 outputSize += decompressedSize 644 } 645 return output, nil 646 } 647 648 // retrieveItems reads up to 'count' items from the table. It reads at least 649 // one item, but otherwise avoids reading more than maxBytes bytes. 650 // It returns the (potentially compressed) data, and the sizes. 651 func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) { 652 t.lock.RLock() 653 defer t.lock.RUnlock() 654 // Ensure the table and the item is accessible 655 if t.index == nil || t.head == nil { 656 return nil, nil, errClosed 657 } 658 itemCount := atomic.LoadUint64(&t.items) // max number 659 // Ensure the start is written, not deleted from the tail, and that the 660 // caller actually wants something 661 if itemCount <= start || uint64(t.itemOffset) > start || count == 0 { 662 return nil, nil, errOutOfBounds 663 } 664 if start+count > itemCount { 665 count = itemCount - start 666 } 667 var ( 668 output = make([]byte, maxBytes) // Buffer to read data into 669 outputSize int // Used size of that buffer 670 ) 671 // readData is a helper method to read a single data item from disk. 672 readData := func(fileId, start uint32, length int) error { 673 // In case a small limit is used, and the elements are large, may need to 674 // realloc the read-buffer when reading the first (and only) item. 675 if len(output) < length { 676 output = make([]byte, length) 677 } 678 dataFile, exist := t.files[fileId] 679 if !exist { 680 return fmt.Errorf("missing data file %d", fileId) 681 } 682 if _, err := dataFile.ReadAt(output[outputSize:outputSize+length], int64(start)); err != nil { 683 return err 684 } 685 outputSize += length 686 return nil 687 } 688 // Read all the indexes in one go 689 indices, err := t.getIndices(start, count) 690 if err != nil { 691 return nil, nil, err 692 } 693 var ( 694 sizes []int // The sizes for each element 695 totalSize = 0 // The total size of all data read so far 696 readStart = indices[0].offset // Where, in the file, to start reading 697 unreadSize = 0 // The size of the as-yet-unread data 698 ) 699 700 for i, firstIndex := range indices[:len(indices)-1] { 701 secondIndex := indices[i+1] 702 // Determine the size of the item. 703 offset1, offset2, _ := firstIndex.bounds(secondIndex) 704 size := int(offset2 - offset1) 705 // Crossing a file boundary? 706 if secondIndex.filenum != firstIndex.filenum { 707 // If we have unread data in the first file, we need to do that read now. 708 if unreadSize > 0 { 709 if err := readData(firstIndex.filenum, readStart, unreadSize); err != nil { 710 return nil, nil, err 711 } 712 unreadSize = 0 713 } 714 readStart = 0 715 } 716 if i > 0 && uint64(totalSize+size) > maxBytes { 717 // About to break out due to byte limit being exceeded. We don't 718 // read this last item, but we need to do the deferred reads now. 719 if unreadSize > 0 { 720 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 721 return nil, nil, err 722 } 723 } 724 break 725 } 726 // Defer the read for later 727 unreadSize += size 728 totalSize += size 729 sizes = append(sizes, size) 730 if i == len(indices)-2 || uint64(totalSize) > maxBytes { 731 // Last item, need to do the read now 732 if err := readData(secondIndex.filenum, readStart, unreadSize); err != nil { 733 return nil, nil, err 734 } 735 break 736 } 737 } 738 return output[:outputSize], sizes, nil 739 } 740 741 // has returns an indicator whether the specified number data 742 // exists in the freezer table. 743 func (t *freezerTable) has(number uint64) bool { 744 return atomic.LoadUint64(&t.items) > number 745 } 746 747 // size returns the total data size in the freezer table. 748 func (t *freezerTable) size() (uint64, error) { 749 t.lock.RLock() 750 defer t.lock.RUnlock() 751 752 return t.sizeNolock() 753 } 754 755 // sizeNolock returns the total data size in the freezer table without obtaining 756 // the mutex first. 757 func (t *freezerTable) sizeNolock() (uint64, error) { 758 stat, err := t.index.Stat() 759 if err != nil { 760 return 0, err 761 } 762 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) 763 return total, nil 764 } 765 766 // Sync pushes any pending data from memory out to disk. This is an expensive 767 // operation, so use it with care. 768 func (t *freezerTable) Sync() error { 769 if err := t.index.Sync(); err != nil { 770 return err 771 } 772 return t.head.Sync() 773 } 774 775 // DumpIndex is a debug print utility function, mainly for testing. It can also 776 // be used to analyse a live freezer table index. 777 func (t *freezerTable) DumpIndex(start, stop int64) { 778 buf := make([]byte, indexEntrySize) 779 780 fmt.Printf("| number | fileno | offset |\n") 781 fmt.Printf("|--------|--------|--------|\n") 782 783 for i := uint64(start); ; i++ { 784 if _, err := t.index.ReadAt(buf, int64(i*indexEntrySize)); err != nil { 785 break 786 } 787 var entry indexEntry 788 entry.unmarshalBinary(buf) 789 fmt.Printf("| %03d | %03d | %03d | \n", i, entry.filenum, entry.offset) 790 if stop > 0 && i >= uint64(stop) { 791 break 792 } 793 } 794 fmt.Printf("|--------------------------|\n") 795 }