github.com/core-coin/go-core/v2@v2.1.9/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 by the Authors 2 // This file is part of the go-core library. 3 // 4 // The go-core library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-core library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-core library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "path/filepath" 26 "sync" 27 "sync/atomic" 28 29 "github.com/golang/snappy" 30 31 "github.com/core-coin/go-core/v2/common" 32 "github.com/core-coin/go-core/v2/log" 33 "github.com/core-coin/go-core/v2/metrics" 34 ) 35 36 var ( 37 // errClosed is returned if an operation attempts to read from or write to the 38 // freezer table after it has already been closed. 39 errClosed = errors.New("closed") 40 41 // errOutOfBounds is returned if the item requested is not contained within the 42 // freezer table. 43 errOutOfBounds = errors.New("out of bounds") 44 45 // errNotSupported is returned if the database doesn't support the required operation. 46 errNotSupported = errors.New("this operation is not supported") 47 ) 48 49 // indexEntry contains the number/id of the file that the data resides in, aswell as the 50 // offset within the file to the end of the data 51 // In serialized form, the filenum is stored as uint16. 52 type indexEntry struct { 53 filenum uint32 // stored as uint16 ( 2 bytes) 54 offset uint32 // stored as uint32 ( 4 bytes) 55 } 56 57 const indexEntrySize = 6 58 59 // unmarshallBinary deserializes binary b into the rawIndex entry. 60 func (i *indexEntry) unmarshalBinary(b []byte) error { 61 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 62 i.offset = binary.BigEndian.Uint32(b[2:6]) 63 return nil 64 } 65 66 // marshallBinary serializes the rawIndex entry into binary. 67 func (i *indexEntry) marshallBinary() []byte { 68 b := make([]byte, indexEntrySize) 69 binary.BigEndian.PutUint16(b[:2], uint16(i.filenum)) 70 binary.BigEndian.PutUint32(b[2:6], i.offset) 71 return b 72 } 73 74 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 75 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 76 // file (uncompressed 64 bit indices into the data file). 77 type freezerTable struct { 78 // WARNING: The `items` field is accessed atomically. On 32 bit platforms, only 79 // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, 80 // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). 81 items uint64 // Number of items stored in the table (including items removed from tail) 82 83 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 84 maxFileSize uint32 // Max file size for data-files 85 name string 86 path string 87 88 head *os.File // File descriptor for the data head of the table 89 files map[uint32]*os.File // open files 90 headId uint32 // number of the currently active head file 91 tailId uint32 // number of the earliest file 92 index *os.File // File descriptor for the indexEntry file of the table 93 94 // In the case that old items are deleted (from the tail), we use itemOffset 95 // to count how many historic items have gone missing. 96 itemOffset uint32 // Offset (number of discarded items) 97 98 headBytes uint32 // Number of bytes written to the head file 99 readMeter metrics.Meter // Meter for measuring the effective amount of data read 100 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 101 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 102 103 logger log.Logger // Logger with database path and table name ambedded 104 lock sync.RWMutex // Mutex protecting the data file descriptors 105 } 106 107 // newTable opens a freezer table with default settings - 2G files 108 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, disableSnappy bool) (*freezerTable, error) { 109 return newCustomTable(path, name, readMeter, writeMeter, sizeGauge, 2*1000*1000*1000, disableSnappy) 110 } 111 112 // openFreezerFileForAppend opens a freezer table file and seeks to the end 113 func openFreezerFileForAppend(filename string) (*os.File, error) { 114 // Open the file without the O_APPEND flag 115 // because it has differing behaviour during Truncate operations 116 // on different OS's 117 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644) 118 if err != nil { 119 return nil, err 120 } 121 // Seek to end for append 122 if _, err = file.Seek(0, io.SeekEnd); err != nil { 123 return nil, err 124 } 125 return file, nil 126 } 127 128 // openFreezerFileForReadOnly opens a freezer table file for read only access 129 func openFreezerFileForReadOnly(filename string) (*os.File, error) { 130 return os.OpenFile(filename, os.O_RDONLY, 0644) 131 } 132 133 // openFreezerFileTruncated opens a freezer table making sure it is truncated 134 func openFreezerFileTruncated(filename string) (*os.File, error) { 135 return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) 136 } 137 138 // truncateFreezerFile resizes a freezer table file and seeks to the end 139 func truncateFreezerFile(file *os.File, size int64) error { 140 if err := file.Truncate(size); err != nil { 141 return err 142 } 143 // Seek to end for append 144 if _, err := file.Seek(0, io.SeekEnd); err != nil { 145 return err 146 } 147 return nil 148 } 149 150 // newCustomTable opens a freezer table, creating the data and index files if they are 151 // non existent. Both files are truncated to the shortest common length to ensure 152 // they don't go out of sync. 153 func newCustomTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression bool) (*freezerTable, error) { 154 // Ensure the containing directory exists and open the indexEntry file 155 if err := os.MkdirAll(path, 0755); err != nil { 156 return nil, err 157 } 158 var idxName string 159 if noCompression { 160 // Raw idx 161 idxName = fmt.Sprintf("%s.ridx", name) 162 } else { 163 // Compressed idx 164 idxName = fmt.Sprintf("%s.cidx", name) 165 } 166 offsets, err := openFreezerFileForAppend(filepath.Join(path, idxName)) 167 if err != nil { 168 return nil, err 169 } 170 // Create the table and repair any past inconsistency 171 tab := &freezerTable{ 172 index: offsets, 173 files: make(map[uint32]*os.File), 174 readMeter: readMeter, 175 writeMeter: writeMeter, 176 sizeGauge: sizeGauge, 177 name: name, 178 path: path, 179 logger: log.New("database", path, "table", name), 180 noCompression: noCompression, 181 maxFileSize: maxFilesize, 182 } 183 if err := tab.repair(); err != nil { 184 tab.Close() 185 return nil, err 186 } 187 // Initialize the starting size counter 188 size, err := tab.sizeNolock() 189 if err != nil { 190 tab.Close() 191 return nil, err 192 } 193 tab.sizeGauge.Inc(int64(size)) 194 195 return tab, nil 196 } 197 198 // repair cross checks the head and the index file and truncates them to 199 // be in sync with each other after a potential crash / data loss. 200 func (t *freezerTable) repair() error { 201 // Create a temporary offset buffer to init files with and read indexEntry into 202 buffer := make([]byte, indexEntrySize) 203 204 // If we've just created the files, initialize the index with the 0 indexEntry 205 stat, err := t.index.Stat() 206 if err != nil { 207 return err 208 } 209 if stat.Size() == 0 { 210 if _, err := t.index.Write(buffer); err != nil { 211 return err 212 } 213 } 214 // Ensure the index is a multiple of indexEntrySize bytes 215 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 216 truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path 217 } 218 // Retrieve the file sizes and prepare for truncation 219 if stat, err = t.index.Stat(); err != nil { 220 return err 221 } 222 offsetsSize := stat.Size() 223 224 // Open the head file 225 var ( 226 firstIndex indexEntry 227 lastIndex indexEntry 228 contentSize int64 229 contentExp int64 230 ) 231 // Read index zero, determine what file is the earliest 232 // and what item offset to use 233 t.index.ReadAt(buffer, 0) 234 firstIndex.unmarshalBinary(buffer) 235 236 t.tailId = firstIndex.filenum 237 t.itemOffset = firstIndex.offset 238 239 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 240 lastIndex.unmarshalBinary(buffer) 241 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 242 if err != nil { 243 return err 244 } 245 if stat, err = t.head.Stat(); err != nil { 246 return err 247 } 248 contentSize = stat.Size() 249 250 // Keep truncating both files until they come in sync 251 contentExp = int64(lastIndex.offset) 252 253 for contentExp != contentSize { 254 // Truncate the head file to the last offset pointer 255 if contentExp < contentSize { 256 t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 257 if err := truncateFreezerFile(t.head, contentExp); err != nil { 258 return err 259 } 260 contentSize = contentExp 261 } 262 // Truncate the index to point within the head file 263 if contentExp > contentSize { 264 t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 265 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 266 return err 267 } 268 offsetsSize -= indexEntrySize 269 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 270 var newLastIndex indexEntry 271 newLastIndex.unmarshalBinary(buffer) 272 // We might have slipped back into an earlier head-file here 273 if newLastIndex.filenum != lastIndex.filenum { 274 // Release earlier opened file 275 t.releaseFile(lastIndex.filenum) 276 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 277 return err 278 } 279 if stat, err = t.head.Stat(); err != nil { 280 // TODO, anything more we can do here? 281 // A data file has gone missing... 282 return err 283 } 284 contentSize = stat.Size() 285 } 286 lastIndex = newLastIndex 287 contentExp = int64(lastIndex.offset) 288 } 289 } 290 // Ensure all reparation changes have been written to disk 291 if err := t.index.Sync(); err != nil { 292 return err 293 } 294 if err := t.head.Sync(); err != nil { 295 return err 296 } 297 // Update the item and byte counters and return 298 t.items = uint64(t.itemOffset) + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file 299 t.headBytes = uint32(contentSize) 300 t.headId = lastIndex.filenum 301 302 // Close opened files and preopen all files 303 if err := t.preopen(); err != nil { 304 return err 305 } 306 t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) 307 return nil 308 } 309 310 // preopen opens all files that the freezer will need. This method should be called from an init-context, 311 // since it assumes that it doesn't have to bother with locking 312 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 313 // obtain a write-lock within Retrieve. 314 func (t *freezerTable) preopen() (err error) { 315 // The repair might have already opened (some) files 316 t.releaseFilesAfter(0, false) 317 // Open all except head in RDONLY 318 for i := t.tailId; i < t.headId; i++ { 319 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 320 return err 321 } 322 } 323 // Open head in read/write 324 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 325 return err 326 } 327 328 // truncate discards any recent data above the provided threshold number. 329 func (t *freezerTable) truncate(items uint64) error { 330 t.lock.Lock() 331 defer t.lock.Unlock() 332 333 // If our item count is correct, don't do anything 334 existing := atomic.LoadUint64(&t.items) 335 if existing <= items { 336 return nil 337 } 338 // We need to truncate, save the old size for metrics tracking 339 oldSize, err := t.sizeNolock() 340 if err != nil { 341 return err 342 } 343 // Something's out of sync, truncate the table's offset index 344 log := t.logger.Debug 345 if existing > items+1 { 346 log = t.logger.Warn // Only loud warn if we delete multiple items 347 } 348 log("Truncating freezer table", "items", existing, "limit", items) 349 if err := truncateFreezerFile(t.index, int64(items+1)*indexEntrySize); err != nil { 350 return err 351 } 352 // Calculate the new expected size of the data file and truncate it 353 buffer := make([]byte, indexEntrySize) 354 if _, err := t.index.ReadAt(buffer, int64(items*indexEntrySize)); err != nil { 355 return err 356 } 357 var expected indexEntry 358 expected.unmarshalBinary(buffer) 359 360 // We might need to truncate back to older files 361 if expected.filenum != t.headId { 362 // If already open for reading, force-reopen for writing 363 t.releaseFile(expected.filenum) 364 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 365 if err != nil { 366 return err 367 } 368 // Release any files _after the current head -- both the previous head 369 // and any files which may have been opened for reading 370 t.releaseFilesAfter(expected.filenum, true) 371 // Set back the historic head 372 t.head = newHead 373 atomic.StoreUint32(&t.headId, expected.filenum) 374 } 375 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 376 return err 377 } 378 // All data files truncated, set internal counters and return 379 atomic.StoreUint64(&t.items, items) 380 atomic.StoreUint32(&t.headBytes, expected.offset) 381 382 // Retrieve the new size and update the total size counter 383 newSize, err := t.sizeNolock() 384 if err != nil { 385 return err 386 } 387 t.sizeGauge.Dec(int64(oldSize - newSize)) 388 389 return nil 390 } 391 392 // Close closes all opened files. 393 func (t *freezerTable) Close() error { 394 t.lock.Lock() 395 defer t.lock.Unlock() 396 397 var errs []error 398 if err := t.index.Close(); err != nil { 399 errs = append(errs, err) 400 } 401 t.index = nil 402 403 for _, f := range t.files { 404 if err := f.Close(); err != nil { 405 errs = append(errs, err) 406 } 407 } 408 t.head = nil 409 410 if errs != nil { 411 return fmt.Errorf("%v", errs) 412 } 413 return nil 414 } 415 416 // openFile assumes that the write-lock is held by the caller 417 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 418 var exist bool 419 if f, exist = t.files[num]; !exist { 420 var name string 421 if t.noCompression { 422 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 423 } else { 424 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 425 } 426 f, err = opener(filepath.Join(t.path, name)) 427 if err != nil { 428 return nil, err 429 } 430 t.files[num] = f 431 } 432 return f, err 433 } 434 435 // releaseFile closes a file, and removes it from the open file cache. 436 // Assumes that the caller holds the write lock 437 func (t *freezerTable) releaseFile(num uint32) { 438 if f, exist := t.files[num]; exist { 439 delete(t.files, num) 440 f.Close() 441 } 442 } 443 444 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 445 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 446 for fnum, f := range t.files { 447 if fnum > num { 448 delete(t.files, fnum) 449 f.Close() 450 if remove { 451 os.Remove(f.Name()) 452 } 453 } 454 } 455 } 456 457 // Append injects a binary blob at the end of the freezer table. The item number 458 // is a precautionary parameter to ensure data correctness, but the table will 459 // reject already existing data. 460 // 461 // Note, this method will *not* flush any data to disk so be sure to explicitly 462 // fsync before irreversibly deleting data from the database. 463 func (t *freezerTable) Append(item uint64, blob []byte) error { 464 // Read lock prevents competition with truncate 465 t.lock.RLock() 466 // Ensure the table is still accessible 467 if t.index == nil || t.head == nil { 468 t.lock.RUnlock() 469 return errClosed 470 } 471 // Ensure only the next item can be written, nothing else 472 if atomic.LoadUint64(&t.items) != item { 473 t.lock.RUnlock() 474 return fmt.Errorf("appending unexpected item: want %d, have %d", t.items, item) 475 } 476 // Encode the blob and write it into the data file 477 if !t.noCompression { 478 blob = snappy.Encode(nil, blob) 479 } 480 bLen := uint32(len(blob)) 481 if t.headBytes+bLen < bLen || 482 t.headBytes+bLen > t.maxFileSize { 483 // we need a new file, writing would overflow 484 t.lock.RUnlock() 485 t.lock.Lock() 486 nextID := atomic.LoadUint32(&t.headId) + 1 487 // We open the next file in truncated mode -- if this file already 488 // exists, we need to start over from scratch on it 489 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 490 if err != nil { 491 t.lock.Unlock() 492 return err 493 } 494 // Close old file, and reopen in RDONLY mode 495 t.releaseFile(t.headId) 496 t.openFile(t.headId, openFreezerFileForReadOnly) 497 498 // Swap out the current head 499 t.head = newHead 500 atomic.StoreUint32(&t.headBytes, 0) 501 atomic.StoreUint32(&t.headId, nextID) 502 t.lock.Unlock() 503 t.lock.RLock() 504 } 505 506 defer t.lock.RUnlock() 507 if _, err := t.head.Write(blob); err != nil { 508 return err 509 } 510 newOffset := atomic.AddUint32(&t.headBytes, bLen) 511 idx := indexEntry{ 512 filenum: atomic.LoadUint32(&t.headId), 513 offset: newOffset, 514 } 515 // Write indexEntry 516 t.index.Write(idx.marshallBinary()) 517 518 t.writeMeter.Mark(int64(bLen + indexEntrySize)) 519 t.sizeGauge.Inc(int64(bLen + indexEntrySize)) 520 521 atomic.AddUint64(&t.items, 1) 522 return nil 523 } 524 525 // getBounds returns the indexes for the item 526 // returns start, end, filenumber and error 527 func (t *freezerTable) getBounds(item uint64) (uint32, uint32, uint32, error) { 528 buffer := make([]byte, indexEntrySize) 529 var startIdx, endIdx indexEntry 530 // Read second index 531 if _, err := t.index.ReadAt(buffer, int64((item+1)*indexEntrySize)); err != nil { 532 return 0, 0, 0, err 533 } 534 endIdx.unmarshalBinary(buffer) 535 // Read first index (unless it's the very first item) 536 if item != 0 { 537 if _, err := t.index.ReadAt(buffer, int64(item*indexEntrySize)); err != nil { 538 return 0, 0, 0, err 539 } 540 startIdx.unmarshalBinary(buffer) 541 } else { 542 // Special case if we're reading the first item in the freezer. We assume that 543 // the first item always start from zero(regarding the deletion, we 544 // only support deletion by files, so that the assumption is held). 545 // This means we can use the first item metadata to carry information about 546 // the 'global' offset, for the deletion-case 547 return 0, endIdx.offset, endIdx.filenum, nil 548 } 549 if startIdx.filenum != endIdx.filenum { 550 // If a piece of data 'crosses' a data-file, 551 // it's actually in one piece on the second data-file. 552 // We return a zero-indexEntry for the second file as start 553 return 0, endIdx.offset, endIdx.filenum, nil 554 } 555 return startIdx.offset, endIdx.offset, endIdx.filenum, nil 556 } 557 558 // Retrieve looks up the data offset of an item with the given number and retrieves 559 // the raw binary blob from the data file. 560 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 561 t.lock.RLock() 562 // Ensure the table and the item is accessible 563 if t.index == nil || t.head == nil { 564 t.lock.RUnlock() 565 return nil, errClosed 566 } 567 if atomic.LoadUint64(&t.items) <= item { 568 t.lock.RUnlock() 569 return nil, errOutOfBounds 570 } 571 // Ensure the item was not deleted from the tail either 572 if uint64(t.itemOffset) > item { 573 t.lock.RUnlock() 574 return nil, errOutOfBounds 575 } 576 startOffset, endOffset, filenum, err := t.getBounds(item - uint64(t.itemOffset)) 577 if err != nil { 578 t.lock.RUnlock() 579 return nil, err 580 } 581 dataFile, exist := t.files[filenum] 582 if !exist { 583 t.lock.RUnlock() 584 return nil, fmt.Errorf("missing data file %d", filenum) 585 } 586 // Retrieve the data itself, decompress and return 587 blob := make([]byte, endOffset-startOffset) 588 if _, err := dataFile.ReadAt(blob, int64(startOffset)); err != nil { 589 t.lock.RUnlock() 590 return nil, err 591 } 592 t.lock.RUnlock() 593 t.readMeter.Mark(int64(len(blob) + 2*indexEntrySize)) 594 595 if t.noCompression { 596 return blob, nil 597 } 598 return snappy.Decode(nil, blob) 599 } 600 601 // has returns an indicator whether the specified number data 602 // exists in the freezer table. 603 func (t *freezerTable) has(number uint64) bool { 604 return atomic.LoadUint64(&t.items) > number 605 } 606 607 // size returns the total data size in the freezer table. 608 func (t *freezerTable) size() (uint64, error) { 609 t.lock.RLock() 610 defer t.lock.RUnlock() 611 612 return t.sizeNolock() 613 } 614 615 // sizeNolock returns the total data size in the freezer table without obtaining 616 // the mutex first. 617 func (t *freezerTable) sizeNolock() (uint64, error) { 618 stat, err := t.index.Stat() 619 if err != nil { 620 return 0, err 621 } 622 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) 623 return total, nil 624 } 625 626 // Sync pushes any pending data from memory out to disk. This is an expensive 627 // operation, so use it with care. 628 func (t *freezerTable) Sync() error { 629 if err := t.index.Sync(); err != nil { 630 return err 631 } 632 return t.head.Sync() 633 } 634 635 // printIndex is a debug print utility function for testing 636 func (t *freezerTable) printIndex() { 637 buf := make([]byte, indexEntrySize) 638 639 fmt.Printf("|-----------------|\n") 640 fmt.Printf("| fileno | offset |\n") 641 fmt.Printf("|--------+--------|\n") 642 643 for i := uint64(0); ; i++ { 644 if _, err := t.index.ReadAt(buf, int64(i*indexEntrySize)); err != nil { 645 break 646 } 647 var entry indexEntry 648 entry.unmarshalBinary(buf) 649 fmt.Printf("| %03d | %03d | \n", entry.filenum, entry.offset) 650 if i > 100 { 651 fmt.Printf(" ... \n") 652 break 653 } 654 } 655 fmt.Printf("|-----------------|\n") 656 }