github.com/ccm-chain/ccmchain@v1.0.0/core/rawdb/freezer_table.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "path/filepath" 26 "sync" 27 "sync/atomic" 28 29 "github.com/ccm-chain/ccmchain/common" 30 "github.com/ccm-chain/ccmchain/log" 31 "github.com/ccm-chain/ccmchain/metrics" 32 "github.com/golang/snappy" 33 ) 34 35 var ( 36 // errClosed is returned if an operation attempts to read from or write to the 37 // freezer table after it has already been closed. 38 errClosed = errors.New("closed") 39 40 // errOutOfBounds is returned if the item requested is not contained within the 41 // freezer table. 42 errOutOfBounds = errors.New("out of bounds") 43 44 // errNotSupported is returned if the database doesn't support the required operation. 45 errNotSupported = errors.New("this operation is not supported") 46 ) 47 48 // indexEntry contains the number/id of the file that the data resides in, aswell as the 49 // offset within the file to the end of the data 50 // In serialized form, the filenum is stored as uint16. 51 type indexEntry struct { 52 filenum uint32 // stored as uint16 ( 2 bytes) 53 offset uint32 // stored as uint32 ( 4 bytes) 54 } 55 56 const indexEntrySize = 6 57 58 // unmarshallBinary deserializes binary b into the rawIndex entry. 59 func (i *indexEntry) unmarshalBinary(b []byte) error { 60 i.filenum = uint32(binary.BigEndian.Uint16(b[:2])) 61 i.offset = binary.BigEndian.Uint32(b[2:6]) 62 return nil 63 } 64 65 // marshallBinary serializes the rawIndex entry into binary. 66 func (i *indexEntry) marshallBinary() []byte { 67 b := make([]byte, indexEntrySize) 68 binary.BigEndian.PutUint16(b[:2], uint16(i.filenum)) 69 binary.BigEndian.PutUint32(b[2:6], i.offset) 70 return b 71 } 72 73 // freezerTable represents a single chained data table within the freezer (e.g. blocks). 74 // It consists of a data file (snappy encoded arbitrary data blobs) and an indexEntry 75 // file (uncompressed 64 bit indices into the data file). 76 type freezerTable struct { 77 // WARNING: The `items` field is accessed atomically. On 32 bit platforms, only 78 // 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned, 79 // so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG). 80 items uint64 // Number of items stored in the table (including items removed from tail) 81 82 noCompression bool // if true, disables snappy compression. Note: does not work retroactively 83 maxFileSize uint32 // Max file size for data-files 84 name string 85 path string 86 87 head *os.File // File descriptor for the data head of the table 88 files map[uint32]*os.File // open files 89 headId uint32 // number of the currently active head file 90 tailId uint32 // number of the earliest file 91 index *os.File // File descriptor for the indexEntry file of the table 92 93 // In the case that old items are deleted (from the tail), we use itemOffset 94 // to count how many historic items have gone missing. 95 itemOffset uint32 // Offset (number of discarded items) 96 97 headBytes uint32 // Number of bytes written to the head file 98 readMeter metrics.Meter // Meter for measuring the effective amount of data read 99 writeMeter metrics.Meter // Meter for measuring the effective amount of data written 100 sizeGauge metrics.Gauge // Gauge for tracking the combined size of all freezer tables 101 102 logger log.Logger // Logger with database path and table name ambedded 103 lock sync.RWMutex // Mutex protecting the data file descriptors 104 } 105 106 // newTable opens a freezer table with default settings - 2G files 107 func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, disableSnappy bool) (*freezerTable, error) { 108 return newCustomTable(path, name, readMeter, writeMeter, sizeGauge, 2*1000*1000*1000, disableSnappy) 109 } 110 111 // openFreezerFileForAppend opens a freezer table file and seeks to the end 112 func openFreezerFileForAppend(filename string) (*os.File, error) { 113 // Open the file without the O_APPEND flag 114 // because it has differing behaviour during Truncate operations 115 // on different OS's 116 file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644) 117 if err != nil { 118 return nil, err 119 } 120 // Seek to end for append 121 if _, err = file.Seek(0, io.SeekEnd); err != nil { 122 return nil, err 123 } 124 return file, nil 125 } 126 127 // openFreezerFileForReadOnly opens a freezer table file for read only access 128 func openFreezerFileForReadOnly(filename string) (*os.File, error) { 129 return os.OpenFile(filename, os.O_RDONLY, 0644) 130 } 131 132 // openFreezerFileTruncated opens a freezer table making sure it is truncated 133 func openFreezerFileTruncated(filename string) (*os.File, error) { 134 return os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) 135 } 136 137 // truncateFreezerFile resizes a freezer table file and seeks to the end 138 func truncateFreezerFile(file *os.File, size int64) error { 139 if err := file.Truncate(size); err != nil { 140 return err 141 } 142 // Seek to end for append 143 if _, err := file.Seek(0, io.SeekEnd); err != nil { 144 return err 145 } 146 return nil 147 } 148 149 // newCustomTable opens a freezer table, creating the data and index files if they are 150 // non existent. Both files are truncated to the shortest common length to ensure 151 // they don't go out of sync. 152 func newCustomTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression bool) (*freezerTable, error) { 153 // Ensure the containing directory exists and open the indexEntry file 154 if err := os.MkdirAll(path, 0755); err != nil { 155 return nil, err 156 } 157 var idxName string 158 if noCompression { 159 // Raw idx 160 idxName = fmt.Sprintf("%s.ridx", name) 161 } else { 162 // Compressed idx 163 idxName = fmt.Sprintf("%s.cidx", name) 164 } 165 offsets, err := openFreezerFileForAppend(filepath.Join(path, idxName)) 166 if err != nil { 167 return nil, err 168 } 169 // Create the table and repair any past inconsistency 170 tab := &freezerTable{ 171 index: offsets, 172 files: make(map[uint32]*os.File), 173 readMeter: readMeter, 174 writeMeter: writeMeter, 175 sizeGauge: sizeGauge, 176 name: name, 177 path: path, 178 logger: log.New("database", path, "table", name), 179 noCompression: noCompression, 180 maxFileSize: maxFilesize, 181 } 182 if err := tab.repair(); err != nil { 183 tab.Close() 184 return nil, err 185 } 186 // Initialize the starting size counter 187 size, err := tab.sizeNolock() 188 if err != nil { 189 tab.Close() 190 return nil, err 191 } 192 tab.sizeGauge.Inc(int64(size)) 193 194 return tab, nil 195 } 196 197 // repair cross checks the head and the index file and truncates them to 198 // be in sync with each other after a potential crash / data loss. 199 func (t *freezerTable) repair() error { 200 // Create a temporary offset buffer to init files with and read indexEntry into 201 buffer := make([]byte, indexEntrySize) 202 203 // If we've just created the files, initialize the index with the 0 indexEntry 204 stat, err := t.index.Stat() 205 if err != nil { 206 return err 207 } 208 if stat.Size() == 0 { 209 if _, err := t.index.Write(buffer); err != nil { 210 return err 211 } 212 } 213 // Ensure the index is a multiple of indexEntrySize bytes 214 if overflow := stat.Size() % indexEntrySize; overflow != 0 { 215 truncateFreezerFile(t.index, stat.Size()-overflow) // New file can't trigger this path 216 } 217 // Retrieve the file sizes and prepare for truncation 218 if stat, err = t.index.Stat(); err != nil { 219 return err 220 } 221 offsetsSize := stat.Size() 222 223 // Open the head file 224 var ( 225 firstIndex indexEntry 226 lastIndex indexEntry 227 contentSize int64 228 contentExp int64 229 ) 230 // Read index zero, determine what file is the earliest 231 // and what item offset to use 232 t.index.ReadAt(buffer, 0) 233 firstIndex.unmarshalBinary(buffer) 234 235 t.tailId = firstIndex.filenum 236 t.itemOffset = firstIndex.offset 237 238 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 239 lastIndex.unmarshalBinary(buffer) 240 t.head, err = t.openFile(lastIndex.filenum, openFreezerFileForAppend) 241 if err != nil { 242 return err 243 } 244 if stat, err = t.head.Stat(); err != nil { 245 return err 246 } 247 contentSize = stat.Size() 248 249 // Keep truncating both files until they come in sync 250 contentExp = int64(lastIndex.offset) 251 252 for contentExp != contentSize { 253 // Truncate the head file to the last offset pointer 254 if contentExp < contentSize { 255 t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 256 if err := truncateFreezerFile(t.head, contentExp); err != nil { 257 return err 258 } 259 contentSize = contentExp 260 } 261 // Truncate the index to point within the head file 262 if contentExp > contentSize { 263 t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) 264 if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { 265 return err 266 } 267 offsetsSize -= indexEntrySize 268 t.index.ReadAt(buffer, offsetsSize-indexEntrySize) 269 var newLastIndex indexEntry 270 newLastIndex.unmarshalBinary(buffer) 271 // We might have slipped back into an earlier head-file here 272 if newLastIndex.filenum != lastIndex.filenum { 273 // Release earlier opened file 274 t.releaseFile(lastIndex.filenum) 275 if t.head, err = t.openFile(newLastIndex.filenum, openFreezerFileForAppend); err != nil { 276 return err 277 } 278 if stat, err = t.head.Stat(); err != nil { 279 // TODO, anything more we can do here? 280 // A data file has gone missing... 281 return err 282 } 283 contentSize = stat.Size() 284 } 285 lastIndex = newLastIndex 286 contentExp = int64(lastIndex.offset) 287 } 288 } 289 // Ensure all reparation changes have been written to disk 290 if err := t.index.Sync(); err != nil { 291 return err 292 } 293 if err := t.head.Sync(); err != nil { 294 return err 295 } 296 // Update the item and byte counters and return 297 t.items = uint64(t.itemOffset) + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file 298 t.headBytes = uint32(contentSize) 299 t.headId = lastIndex.filenum 300 301 // Close opened files and preopen all files 302 if err := t.preopen(); err != nil { 303 return err 304 } 305 t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) 306 return nil 307 } 308 309 // preopen opens all files that the freezer will need. This method should be called from an init-context, 310 // since it assumes that it doesn't have to bother with locking 311 // The rationale for doing preopen is to not have to do it from within Retrieve, thus not needing to ever 312 // obtain a write-lock within Retrieve. 313 func (t *freezerTable) preopen() (err error) { 314 // The repair might have already opened (some) files 315 t.releaseFilesAfter(0, false) 316 // Open all except head in RDONLY 317 for i := t.tailId; i < t.headId; i++ { 318 if _, err = t.openFile(i, openFreezerFileForReadOnly); err != nil { 319 return err 320 } 321 } 322 // Open head in read/write 323 t.head, err = t.openFile(t.headId, openFreezerFileForAppend) 324 return err 325 } 326 327 // truncate discards any recent data above the provided threshold number. 328 func (t *freezerTable) truncate(items uint64) error { 329 t.lock.Lock() 330 defer t.lock.Unlock() 331 332 // If our item count is correct, don't do anything 333 existing := atomic.LoadUint64(&t.items) 334 if existing <= items { 335 return nil 336 } 337 // We need to truncate, save the old size for metrics tracking 338 oldSize, err := t.sizeNolock() 339 if err != nil { 340 return err 341 } 342 // Something's out of sync, truncate the table's offset index 343 log := t.logger.Debug 344 if existing > items+1 { 345 log = t.logger.Warn // Only loud warn if we delete multiple items 346 } 347 log("Truncating freezer table", "items", existing, "limit", items) 348 if err := truncateFreezerFile(t.index, int64(items+1)*indexEntrySize); err != nil { 349 return err 350 } 351 // Calculate the new expected size of the data file and truncate it 352 buffer := make([]byte, indexEntrySize) 353 if _, err := t.index.ReadAt(buffer, int64(items*indexEntrySize)); err != nil { 354 return err 355 } 356 var expected indexEntry 357 expected.unmarshalBinary(buffer) 358 359 // We might need to truncate back to older files 360 if expected.filenum != t.headId { 361 // If already open for reading, force-reopen for writing 362 t.releaseFile(expected.filenum) 363 newHead, err := t.openFile(expected.filenum, openFreezerFileForAppend) 364 if err != nil { 365 return err 366 } 367 // Release any files _after the current head -- both the previous head 368 // and any files which may have been opened for reading 369 t.releaseFilesAfter(expected.filenum, true) 370 // Set back the historic head 371 t.head = newHead 372 atomic.StoreUint32(&t.headId, expected.filenum) 373 } 374 if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil { 375 return err 376 } 377 // All data files truncated, set internal counters and return 378 atomic.StoreUint64(&t.items, items) 379 atomic.StoreUint32(&t.headBytes, expected.offset) 380 381 // Retrieve the new size and update the total size counter 382 newSize, err := t.sizeNolock() 383 if err != nil { 384 return err 385 } 386 t.sizeGauge.Dec(int64(oldSize - newSize)) 387 388 return nil 389 } 390 391 // Close closes all opened files. 392 func (t *freezerTable) Close() error { 393 t.lock.Lock() 394 defer t.lock.Unlock() 395 396 var errs []error 397 if err := t.index.Close(); err != nil { 398 errs = append(errs, err) 399 } 400 t.index = nil 401 402 for _, f := range t.files { 403 if err := f.Close(); err != nil { 404 errs = append(errs, err) 405 } 406 } 407 t.head = nil 408 409 if errs != nil { 410 return fmt.Errorf("%v", errs) 411 } 412 return nil 413 } 414 415 // openFile assumes that the write-lock is held by the caller 416 func (t *freezerTable) openFile(num uint32, opener func(string) (*os.File, error)) (f *os.File, err error) { 417 var exist bool 418 if f, exist = t.files[num]; !exist { 419 var name string 420 if t.noCompression { 421 name = fmt.Sprintf("%s.%04d.rdat", t.name, num) 422 } else { 423 name = fmt.Sprintf("%s.%04d.cdat", t.name, num) 424 } 425 f, err = opener(filepath.Join(t.path, name)) 426 if err != nil { 427 return nil, err 428 } 429 t.files[num] = f 430 } 431 return f, err 432 } 433 434 // releaseFile closes a file, and removes it from the open file cache. 435 // Assumes that the caller holds the write lock 436 func (t *freezerTable) releaseFile(num uint32) { 437 if f, exist := t.files[num]; exist { 438 delete(t.files, num) 439 f.Close() 440 } 441 } 442 443 // releaseFilesAfter closes all open files with a higher number, and optionally also deletes the files 444 func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) { 445 for fnum, f := range t.files { 446 if fnum > num { 447 delete(t.files, fnum) 448 f.Close() 449 if remove { 450 os.Remove(f.Name()) 451 } 452 } 453 } 454 } 455 456 // Append injects a binary blob at the end of the freezer table. The item number 457 // is a precautionary parameter to ensure data correctness, but the table will 458 // reject already existing data. 459 // 460 // Note, this method will *not* flush any data to disk so be sure to explicitly 461 // fsync before irreversibly deleting data from the database. 462 func (t *freezerTable) Append(item uint64, blob []byte) error { 463 // Read lock prevents competition with truncate 464 t.lock.RLock() 465 // Ensure the table is still accessible 466 if t.index == nil || t.head == nil { 467 t.lock.RUnlock() 468 return errClosed 469 } 470 // Ensure only the next item can be written, nothing else 471 if atomic.LoadUint64(&t.items) != item { 472 t.lock.RUnlock() 473 return fmt.Errorf("appending unexpected item: want %d, have %d", t.items, item) 474 } 475 // Encode the blob and write it into the data file 476 if !t.noCompression { 477 blob = snappy.Encode(nil, blob) 478 } 479 bLen := uint32(len(blob)) 480 if t.headBytes+bLen < bLen || 481 t.headBytes+bLen > t.maxFileSize { 482 // we need a new file, writing would overflow 483 t.lock.RUnlock() 484 t.lock.Lock() 485 nextID := atomic.LoadUint32(&t.headId) + 1 486 // We open the next file in truncated mode -- if this file already 487 // exists, we need to start over from scratch on it 488 newHead, err := t.openFile(nextID, openFreezerFileTruncated) 489 if err != nil { 490 t.lock.Unlock() 491 return err 492 } 493 // Close old file, and reopen in RDONLY mode 494 t.releaseFile(t.headId) 495 t.openFile(t.headId, openFreezerFileForReadOnly) 496 497 // Swap out the current head 498 t.head = newHead 499 atomic.StoreUint32(&t.headBytes, 0) 500 atomic.StoreUint32(&t.headId, nextID) 501 t.lock.Unlock() 502 t.lock.RLock() 503 } 504 505 defer t.lock.RUnlock() 506 if _, err := t.head.Write(blob); err != nil { 507 return err 508 } 509 newOffset := atomic.AddUint32(&t.headBytes, bLen) 510 idx := indexEntry{ 511 filenum: atomic.LoadUint32(&t.headId), 512 offset: newOffset, 513 } 514 // Write indexEntry 515 t.index.Write(idx.marshallBinary()) 516 517 t.writeMeter.Mark(int64(bLen + indexEntrySize)) 518 t.sizeGauge.Inc(int64(bLen + indexEntrySize)) 519 520 atomic.AddUint64(&t.items, 1) 521 return nil 522 } 523 524 // getBounds returns the indexes for the item 525 // returns start, end, filenumber and error 526 func (t *freezerTable) getBounds(item uint64) (uint32, uint32, uint32, error) { 527 buffer := make([]byte, indexEntrySize) 528 var startIdx, endIdx indexEntry 529 // Read second index 530 if _, err := t.index.ReadAt(buffer, int64((item+1)*indexEntrySize)); err != nil { 531 return 0, 0, 0, err 532 } 533 endIdx.unmarshalBinary(buffer) 534 // Read first index (unless it's the very first item) 535 if item != 0 { 536 if _, err := t.index.ReadAt(buffer, int64(item*indexEntrySize)); err != nil { 537 return 0, 0, 0, err 538 } 539 startIdx.unmarshalBinary(buffer) 540 } else { 541 // Special case if we're reading the first item in the freezer. We assume that 542 // the first item always start from zero(regarding the deletion, we 543 // only support deletion by files, so that the assumption is held). 544 // This means we can use the first item metadata to carry information about 545 // the 'global' offset, for the deletion-case 546 return 0, endIdx.offset, endIdx.filenum, nil 547 } 548 if startIdx.filenum != endIdx.filenum { 549 // If a piece of data 'crosses' a data-file, 550 // it's actually in one piece on the second data-file. 551 // We return a zero-indexEntry for the second file as start 552 return 0, endIdx.offset, endIdx.filenum, nil 553 } 554 return startIdx.offset, endIdx.offset, endIdx.filenum, nil 555 } 556 557 // Retrieve looks up the data offset of an item with the given number and retrieves 558 // the raw binary blob from the data file. 559 func (t *freezerTable) Retrieve(item uint64) ([]byte, error) { 560 t.lock.RLock() 561 // Ensure the table and the item is accessible 562 if t.index == nil || t.head == nil { 563 t.lock.RUnlock() 564 return nil, errClosed 565 } 566 if atomic.LoadUint64(&t.items) <= item { 567 t.lock.RUnlock() 568 return nil, errOutOfBounds 569 } 570 // Ensure the item was not deleted from the tail either 571 if uint64(t.itemOffset) > item { 572 t.lock.RUnlock() 573 return nil, errOutOfBounds 574 } 575 startOffset, endOffset, filenum, err := t.getBounds(item - uint64(t.itemOffset)) 576 if err != nil { 577 t.lock.RUnlock() 578 return nil, err 579 } 580 dataFile, exist := t.files[filenum] 581 if !exist { 582 t.lock.RUnlock() 583 return nil, fmt.Errorf("missing data file %d", filenum) 584 } 585 // Retrieve the data itself, decompress and return 586 blob := make([]byte, endOffset-startOffset) 587 if _, err := dataFile.ReadAt(blob, int64(startOffset)); err != nil { 588 t.lock.RUnlock() 589 return nil, err 590 } 591 t.lock.RUnlock() 592 t.readMeter.Mark(int64(len(blob) + 2*indexEntrySize)) 593 594 if t.noCompression { 595 return blob, nil 596 } 597 return snappy.Decode(nil, blob) 598 } 599 600 // has returns an indicator whether the specified number data 601 // exists in the freezer table. 602 func (t *freezerTable) has(number uint64) bool { 603 return atomic.LoadUint64(&t.items) > number 604 } 605 606 // size returns the total data size in the freezer table. 607 func (t *freezerTable) size() (uint64, error) { 608 t.lock.RLock() 609 defer t.lock.RUnlock() 610 611 return t.sizeNolock() 612 } 613 614 // sizeNolock returns the total data size in the freezer table without obtaining 615 // the mutex first. 616 func (t *freezerTable) sizeNolock() (uint64, error) { 617 stat, err := t.index.Stat() 618 if err != nil { 619 return 0, err 620 } 621 total := uint64(t.maxFileSize)*uint64(t.headId-t.tailId) + uint64(t.headBytes) + uint64(stat.Size()) 622 return total, nil 623 } 624 625 // Sync pushes any pending data from memory out to disk. This is an expensive 626 // operation, so use it with care. 627 func (t *freezerTable) Sync() error { 628 if err := t.index.Sync(); err != nil { 629 return err 630 } 631 return t.head.Sync() 632 } 633 634 // printIndex is a debug print utility function for testing 635 func (t *freezerTable) printIndex() { 636 buf := make([]byte, indexEntrySize) 637 638 fmt.Printf("|-----------------|\n") 639 fmt.Printf("| fileno | offset |\n") 640 fmt.Printf("|--------+--------|\n") 641 642 for i := uint64(0); ; i++ { 643 if _, err := t.index.ReadAt(buf, int64(i*indexEntrySize)); err != nil { 644 break 645 } 646 var entry indexEntry 647 entry.unmarshalBinary(buf) 648 fmt.Printf("| %03d | %03d | \n", entry.filenum, entry.offset) 649 if i > 100 { 650 fmt.Printf(" ... \n") 651 break 652 } 653 } 654 fmt.Printf("|-----------------|\n") 655 }