github.com/bcskill/bcschain/v3@v3.4.9-beta2/ethdb/file_segment.go (about) 1 package ethdb 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "encoding/binary" 8 "encoding/hex" 9 "errors" 10 "fmt" 11 "io" 12 "io/ioutil" 13 "os" 14 "path/filepath" 15 16 "github.com/cespare/xxhash" 17 "github.com/edsrzf/mmap-go" 18 "github.com/bcskill/bcschain/v3/common" 19 "github.com/bcskill/bcschain/v3/log" 20 ) 21 22 var ( 23 ErrImmutableSegment = errors.New("ethdb: immutable segment") 24 ErrSegmentTypeUnknown = errors.New("ethdb: segment type unknown") 25 ErrFileSegmentChecksumMismatch = errors.New("ethdb: file segment checksum mismatch") 26 ) 27 28 const ( 29 // FileSegmentMagic is the magic number at the beginning of the file segment. 30 FileSegmentMagic = "ETH1" 31 32 // FileSegmentChecksumSize is the size of the checksum, in bytes. 33 FileSegmentChecksumSize = 8 34 35 // FileSegmentIndexOffsetSize is the size of the index offset, in bytes. 36 FileSegmentIndexOffsetSize = 8 37 38 // FileSegmentIndexCountSize is the size of the index element count, in bytes. 39 FileSegmentIndexCountSize = 8 40 41 // FileSegmentIndexCapacitySize is the size of the index capacity, in bytes. 42 FileSegmentIndexCapacitySize = 8 43 44 // FileSegmentHeaderSize is the total size of the fixed length FileSegment header. 45 FileSegmentHeaderSize = 0 + 46 len(FileSegmentMagic) + 47 FileSegmentChecksumSize + 48 FileSegmentIndexOffsetSize + 49 FileSegmentIndexCountSize + 50 FileSegmentIndexCapacitySize 51 ) 52 53 // Ensure implementation implements interface. 54 var _ Segment = (*FileSegment)(nil) 55 56 // FileSegment represents an immutable key/value file segment for a table. 57 type FileSegment struct { 58 name string // segment name 59 path string // on-disk path 60 data []byte // memory-mapped data 61 file *os.File // file backing data 62 } 63 64 // NewFileSegment returns a new instance of FileSegment. 65 func NewFileSegment(name, path string) *FileSegment { 66 return &FileSegment{ 67 name: name, 68 path: path, 69 } 70 } 71 72 // Open opens and initializes the file segment. 73 func (s *FileSegment) Open() error { 74 file, err := os.Open(s.path) 75 if err != nil { 76 log.Error("Cannot open file segment", "path", s.path, "err", err) 77 return err 78 } 79 s.file = file 80 81 // Memory-map data. 82 data, err := mmap.Map(file, mmap.RDONLY, 0) 83 if err != nil { 84 log.Error("Cannot mmap file segment", "path", s.path, "err", err) 85 file.Close() 86 return err 87 } 88 s.data = []byte(data) 89 90 // Ensure header information is valid. 91 if len(data) < FileSegmentHeaderSize { 92 s.Close() 93 return errors.New("ethdb: file header too short") 94 } else if string(data[:len(FileSegmentMagic)]) != FileSegmentMagic { 95 s.Close() 96 return errors.New("ethdb: invalid ethdb file") 97 } 98 return nil 99 } 100 101 // Close closes the file and its mmap. 102 func (s *FileSegment) Close() (err error) { 103 if s.data != nil { 104 err = (*mmap.MMap)(&s.data).Unmap() 105 s.data = nil 106 } 107 if s.file != nil { 108 if ferr := s.file.Close(); ferr != nil && err == nil { 109 err = ferr 110 } 111 s.file = nil 112 } 113 return 114 } 115 116 // Name returns the name of the segment. 117 func (s *FileSegment) Name() string { return s.name } 118 119 // Path returns the path of the segment. 120 func (s *FileSegment) Path() string { return s.path } 121 122 // Size returns the size of the underlying data file. 123 func (s *FileSegment) Size() int { 124 return len(s.data) 125 } 126 127 // Data returns the underlying mmap data. 128 func (s *FileSegment) Data() []byte { 129 return s.data 130 } 131 132 // Checksum returns the checksum written to the segment file. 133 func (s *FileSegment) Checksum() []byte { 134 if len(s.data) < len(FileSegmentMagic)+FileSegmentChecksumSize { 135 return nil 136 } 137 return s.data[4:12] 138 } 139 140 // Len returns the number of keys in the file. 141 func (s *FileSegment) Len() int { 142 if s.data == nil { 143 return 0 144 } 145 data := s.data[len(FileSegmentMagic)+FileSegmentChecksumSize+FileSegmentIndexOffsetSize:] 146 return int(binary.BigEndian.Uint64(data[:FileSegmentIndexCountSize])) 147 } 148 149 // index returns the byte slice containing the index. 150 func (s *FileSegment) Index() []byte { 151 if s.data == nil { 152 return nil 153 } 154 return s.data[s.IndexOffset():] 155 } 156 157 // indexOffset returns the file offset where the index starts. 158 func (s *FileSegment) IndexOffset() int64 { 159 if s.data == nil { 160 return -1 161 } 162 return int64(binary.BigEndian.Uint64(s.data[len(FileSegmentMagic)+FileSegmentChecksumSize:])) 163 } 164 165 // capacity returns the capacity of the index. 166 func (s *FileSegment) Cap() int { 167 if s.data == nil { 168 return 0 169 } 170 data := s.data[len(FileSegmentMagic)+FileSegmentChecksumSize+FileSegmentIndexOffsetSize+FileSegmentIndexCountSize:] 171 return int(binary.BigEndian.Uint64(data[:FileSegmentIndexCapacitySize])) 172 } 173 174 // Has returns true if the key exists. 175 func (s *FileSegment) Has(key []byte) (bool, error) { 176 koff, _ := s.offset(key) 177 return koff != 0, nil 178 } 179 180 // Get returns the value of the given key. 181 func (s *FileSegment) Get(key []byte) ([]byte, error) { 182 defer func() { 183 if r := recover(); r != nil { 184 log.Error("Cannot read key in file segment", "path", s.path, "key", fmt.Sprintf("%x", key)) 185 panic(r) 186 } 187 }() 188 189 _, voff := s.offset(key) 190 if voff == 0 { 191 return nil, common.ErrNotFound 192 } 193 194 // Read value. 195 data := s.data[voff:] 196 n, sz := binary.Uvarint(data) 197 return common.CopyBytes(data[sz : sz+int(n) : sz+int(n)]), nil 198 } 199 200 // Iterator returns an iterator for iterating over all key/value pairs. 201 func (s *FileSegment) Iterator() SegmentIterator { 202 return &FileSegmentIterator{ 203 data: s.data[:s.IndexOffset()], 204 offset: int64(FileSegmentHeaderSize), 205 } 206 } 207 208 // offset returns the offset of key & value. Returns 0 if key does not exist. 209 func (s *FileSegment) offset(key []byte) (koff, voff int64) { 210 capacity := uint64(s.Cap()) 211 if capacity == 0 { 212 return 0, 0 213 } 214 mask := capacity - 1 215 216 idx := s.Index() 217 hash := hashKey(key) 218 pos := hash & mask 219 220 for d := uint64(0); ; d++ { 221 // Exit if empty slot found. 222 offset := int64(binary.BigEndian.Uint64(idx[pos*8:])) 223 if offset == 0 { 224 return 0, 0 225 } 226 227 // Read current key & compute hash. 228 data := s.data[offset:] 229 n, sz := binary.Uvarint(data) 230 curr := data[sz : sz+int(n)] 231 currHash := hashKey(curr) 232 233 // Exit if distance exceeds current slot or key matches. 234 if d > dist(currHash, pos, capacity, mask) { 235 return 0, 0 236 } else if currHash == hash && bytes.Equal(curr, key) { 237 return offset, offset + int64(sz) + int64(n) 238 } 239 pos = (pos + 1) & mask 240 } 241 } 242 243 // Ensure implementation implements interface. 244 var _ SegmentIterator = (*FileSegmentIterator)(nil) 245 246 // FileSegmentIterator returns an error for sequentially iterating over a 247 // FileSegment's key/value pairs. 248 type FileSegmentIterator struct { 249 data []byte 250 offset int64 251 252 key []byte 253 value []byte 254 } 255 256 // Close releases the iterator. 257 func (itr *FileSegmentIterator) Close() error { 258 itr.data, itr.offset = nil, 0 259 itr.key, itr.value = nil, nil 260 return nil 261 } 262 263 // Key returns the current key. Must be called after Next(). 264 func (itr *FileSegmentIterator) Key() []byte { return itr.key } 265 266 // Value returns the current key. Must be called after Next(). 267 func (itr *FileSegmentIterator) Value() []byte { return itr.value } 268 269 // Next reads the next key/value pair into the buffer. 270 func (itr *FileSegmentIterator) Next() bool { 271 if itr.offset >= int64(len(itr.data)) { 272 return false 273 } 274 275 // Read key. 276 n, sz := binary.Uvarint(itr.data[itr.offset:]) 277 itr.key = itr.data[itr.offset+int64(sz) : itr.offset+int64(sz+int(n))] 278 itr.offset += int64(sz + int(n)) 279 280 // Read value. 281 n, sz = binary.Uvarint(itr.data[itr.offset:]) 282 itr.value = itr.data[itr.offset+int64(sz) : itr.offset+int64(sz+int(n))] 283 itr.offset += int64(sz + int(n)) 284 285 return true 286 } 287 288 // FileSegmentOpener initializes and opens segments. 289 type FileSegmentOpener struct{} 290 291 // NewFileSegmentOpener returns a new instance of FileSegmentOpener. 292 func NewFileSegmentOpener() *FileSegmentOpener { 293 return &FileSegmentOpener{} 294 } 295 296 // ListSegmentNames returns a list of all segment names for a table. 297 func (o *FileSegmentOpener) ListSegmentNames(path, table string) ([]string, error) { 298 fis, err := ioutil.ReadDir(path) 299 if err != nil { 300 log.Error("Cannot list file segments", "path", path, "table", table, "err", err) 301 return nil, err 302 } 303 304 var keys []string 305 for _, fi := range fis { 306 if filepath.Ext(fi.Name()) != "" { 307 continue 308 } 309 keys = append(keys, fi.Name()) 310 } 311 return keys, nil 312 } 313 314 // OpenSegment returns an initialized and opened segment. 315 func (o *FileSegmentOpener) OpenSegment(table, name, path string) (Segment, error) { 316 // Determine the segment file type. 317 typ, err := SegmentFileType(path) 318 if err != nil { 319 return nil, err 320 } 321 322 switch typ { 323 case SegmentETH1: 324 segment := NewFileSegment(name, path) 325 if err := segment.Open(); err != nil { 326 return nil, err 327 } 328 return segment, nil 329 default: 330 return nil, ErrSegmentTypeUnknown 331 } 332 } 333 334 // FileSegmentCompactor locally compacts LDB segments into file segments. 335 type FileSegmentCompactor struct{} 336 337 // NewFileSegmentCompactor returns a new instance of FileSegmentCompactor. 338 func NewFileSegmentCompactor() *FileSegmentCompactor { 339 return &FileSegmentCompactor{} 340 } 341 342 // CompactSegment compacts an LDB segment into a file segment. 343 func (c *FileSegmentCompactor) CompactSegment(ctx context.Context, table string, s *LDBSegment) (Segment, error) { 344 tmpPath := s.Path() + ".tmp" 345 if err := c.CompactSegmentTo(ctx, s, tmpPath); err != nil { 346 return nil, err 347 } else if err := s.Close(); err != nil { 348 return nil, err 349 } else if err := c.RenameSegment(ctx, s.Path(), tmpPath); err != nil { 350 return nil, err 351 } 352 353 // Reopen as file segment. 354 newSegment := NewFileSegment(s.Name(), s.Path()) 355 if err := newSegment.Open(); err != nil { 356 return nil, err 357 } 358 return newSegment, nil 359 } 360 361 // CompactSegmentTo compacts an LDB segment to a specified path. 362 func (c *FileSegmentCompactor) CompactSegmentTo(ctx context.Context, s *LDBSegment, path string) error { 363 if err := s.CompactTo(path); err != nil { 364 os.Remove(path) 365 return err 366 } 367 return nil 368 } 369 370 // UncompactSegment converts an LDB segment back into a file segment. 371 func (c *FileSegmentCompactor) UncompactSegment(ctx context.Context, table string, s Segment) (*LDBSegment, error) { 372 tmpPath := s.Path() + ".tmp" 373 if err := c.UncompactSegmentTo(ctx, s, tmpPath); err != nil { 374 return nil, err 375 } else if err := s.Close(); err != nil { 376 return nil, err 377 } else if err := c.RenameSegment(ctx, s.Path(), tmpPath); err != nil { 378 return nil, err 379 } 380 381 // Reopen as LDB segment. 382 newLDBSegment := NewLDBSegment(s.Name(), s.Path()) 383 if err := newLDBSegment.Open(); err != nil { 384 return nil, err 385 } 386 return newLDBSegment, nil 387 } 388 389 // UncompactSegmentTo converts a segment back to an LDB segment at path. 390 func (c *FileSegmentCompactor) UncompactSegmentTo(ctx context.Context, s Segment, path string) error { 391 if err := UncompactSegmentTo(s, path); err != nil { 392 os.Remove(path) 393 return err 394 } 395 return nil 396 } 397 398 // RenameSegment removes dst and renames the new segment at path. 399 func (c *FileSegmentCompactor) RenameSegment(ctx context.Context, dst, src string) error { 400 if err := os.RemoveAll(dst); err != nil { 401 return err 402 } else if err := os.Rename(src, dst); err != nil { 403 return err 404 } 405 return nil 406 } 407 408 // FileSegmentEncoder represents a encoder for building a ethdb.FileSegment. 409 type FileSegmentEncoder struct { 410 f *os.File 411 flushed bool 412 413 offset int64 414 offsets []int64 415 416 // Filename of file segment to encode. 417 Path string 418 } 419 420 func NewFileSegmentEncoder(path string) *FileSegmentEncoder { 421 return &FileSegmentEncoder{ 422 Path: path, 423 } 424 } 425 426 // Open opens and initializes the output file segment. 427 func (enc *FileSegmentEncoder) Open() (err error) { 428 if enc.f != nil { 429 return errors.New("ethdb: file already open") 430 } 431 if enc.f, err = os.OpenFile(enc.Path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666); err != nil { 432 return err 433 } 434 435 // Write magic & leave space for checksum & index offset. 436 if _, err := enc.f.Write([]byte(FileSegmentMagic)); err != nil { 437 enc.Close() 438 return err 439 } else if _, err := enc.f.Write(make([]byte, FileSegmentHeaderSize-len(FileSegmentMagic))); err != nil { 440 enc.Close() 441 return err 442 } 443 enc.offset = int64(FileSegmentHeaderSize) 444 445 return nil 446 } 447 448 // Close closes the file handle. File must be flushed before calling close. 449 func (enc *FileSegmentEncoder) Close() error { 450 if enc.f != nil { 451 if err := enc.f.Close(); err != nil { 452 return err 453 } 454 } 455 return nil 456 } 457 458 // Flush finalizes the file segment and appends a hashmap & trailer. 459 func (enc *FileSegmentEncoder) Flush() error { 460 if enc.flushed { 461 return errors.New("ethdb: file index already flushed") 462 } 463 enc.flushed = true 464 465 if err := enc.writeIndex(); err != nil { 466 return fmt.Errorf("ethdb: cannot write index: %s", err) 467 } else if err := enc.writeChecksum(); err != nil { 468 return fmt.Errorf("ethdb: cannot write checksum: %s", err) 469 } else if err := enc.f.Sync(); err != nil { 470 return err 471 } 472 return nil 473 } 474 475 // EncodeKeyValue writes framed key & value byte slices to the file and records their offset. 476 func (enc *FileSegmentEncoder) EncodeKeyValue(key, value []byte) error { 477 buf := make([]byte, binary.MaxVarintLen64) 478 offset := enc.offset 479 480 // Write key len + data. 481 n := binary.PutUvarint(buf, uint64(len(key))) 482 if err := enc.write(buf[:n]); err != nil { 483 return err 484 } else if err := enc.write(key); err != nil { 485 return err 486 } 487 488 // Write value len + data. 489 n = binary.PutUvarint(buf, uint64(len(value))) 490 if err := enc.write(buf[:n]); err != nil { 491 return err 492 } else if err := enc.write(value); err != nil { 493 return err 494 } 495 496 enc.offsets = append(enc.offsets, offset) 497 return nil 498 } 499 500 func (enc *FileSegmentEncoder) write(b []byte) error { 501 n, err := enc.f.Write(b) 502 enc.offset += int64(n) 503 return err 504 } 505 506 func (enc *FileSegmentEncoder) writeIndex() error { 507 // Save offset to the start of the index. 508 indexOffset := enc.offset 509 510 // Open separate handler to reasd on-disk data. 511 f, err := os.Open(enc.Path) 512 if err != nil { 513 return err 514 } 515 defer f.Close() 516 517 // Build index in-memory. 518 idx := newFileSegmentEncoderIndex(f, len(enc.offsets)) 519 for _, offset := range enc.offsets { 520 if err := idx.insert(offset); err != nil { 521 return err 522 } 523 } 524 525 // Encode index to writer. 526 if _, err := idx.WriteTo(enc.f); err != nil { 527 return err 528 } 529 530 // Write length, capacity & index offset to the header. 531 hdr := make([]byte, FileSegmentIndexOffsetSize+FileSegmentIndexCountSize+FileSegmentIndexCapacitySize) 532 binary.BigEndian.PutUint64(hdr[0:8], uint64(indexOffset)) 533 binary.BigEndian.PutUint64(hdr[8:16], uint64(len(enc.offsets))) 534 binary.BigEndian.PutUint64(hdr[16:24], uint64(idx.capacity())) 535 if _, err := enc.f.Seek(int64(len(FileSegmentMagic)+FileSegmentChecksumSize), io.SeekStart); err != nil { 536 return err 537 } else if _, err := enc.f.Write(hdr); err != nil { 538 return err 539 } else if err := enc.f.Sync(); err != nil { 540 return err 541 } 542 return nil 543 } 544 545 func (enc *FileSegmentEncoder) writeChecksum() error { 546 buf, err := ChecksumFileSegment(enc.Path) 547 if err != nil { 548 return err 549 } 550 551 if _, err := enc.f.Seek(int64(len(FileSegmentMagic)), io.SeekStart); err != nil { 552 return err 553 } else if _, err := enc.f.Write(buf); err != nil { 554 return err 555 } else if err := enc.f.Sync(); err != nil { 556 return err 557 } 558 return nil 559 } 560 561 // ChecksumFileSegment calculates the checksum for the file segment at path. 562 func ChecksumFileSegment(path string) ([]byte, error) { 563 // Open handler to compute checksum. 564 f, err := os.Open(path) 565 if err != nil { 566 return nil, err 567 } 568 defer f.Close() 569 570 // Compute checksum for all data after checksum. 571 h := xxhash.New() 572 if _, err := f.Seek(int64(len(FileSegmentMagic)+FileSegmentChecksumSize), io.SeekStart); err != nil { 573 return nil, err 574 } else if _, err := io.Copy(h, f); err != nil { 575 return nil, err 576 } 577 578 buf := make([]byte, FileSegmentChecksumSize) 579 binary.BigEndian.PutUint64(buf, h.Sum64()) 580 581 return buf, nil 582 } 583 584 // VerifyFileSegment compares the calculated and stored checksum of the segment at path. 585 func VerifyFileSegment(path string) error { 586 computed, err := ChecksumFileSegment(path) 587 if err != nil { 588 return err 589 } 590 591 s := NewFileSegment(filepath.Base(path), path) 592 if err := s.Open(); err != nil { 593 return err 594 } 595 defer s.Close() 596 597 if !bytes.Equal(s.Checksum(), computed) { 598 return ErrFileSegmentChecksumMismatch 599 } 600 return nil 601 } 602 603 // fileSegmentEncoderIndex represents a fixed-length RHH-based hash map. 604 // The map does not support insertion of duplicate keys. 605 // 606 // https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf 607 type fileSegmentEncoderIndex struct { 608 src io.ReadSeeker 609 r *bufio.Reader 610 mask uint64 611 elems []int64 612 } 613 614 // newFileSegmentEncoderIndex returns a new instance of fileSegmentEncoderIndex. 615 func newFileSegmentEncoderIndex(src io.ReadSeeker, n int) *fileSegmentEncoderIndex { 616 idx := &fileSegmentEncoderIndex{ 617 src: src, 618 r: bufio.NewReader(src), 619 } 620 621 // Determine maximum capacity by padding length and finding next power of 2. 622 const loadFactor = 90 623 capacity := pow2(uint64((n * 100) / loadFactor)) 624 625 idx.elems = make([]int64, capacity) 626 idx.mask = uint64(capacity - 1) 627 628 return idx 629 } 630 631 // WriteTo writes the index to w. Implements io.WriterTo. 632 func (idx *fileSegmentEncoderIndex) WriteTo(w io.Writer) (n int64, err error) { 633 buf := make([]byte, 8) 634 for _, elem := range idx.elems { 635 binary.BigEndian.PutUint64(buf, uint64(elem)) 636 637 nn, err := w.Write(buf) 638 if n += int64(nn); err != nil { 639 return n, err 640 } 641 } 642 return n, nil 643 } 644 645 // capacity returns the computed capacity based on the initial count. 646 func (idx *fileSegmentEncoderIndex) capacity() int { 647 return len(idx.elems) 648 } 649 650 // insert writes the element at the given offset to the index. 651 func (idx *fileSegmentEncoderIndex) insert(offset int64) error { 652 key, err := idx.readAt(offset) 653 if err != nil { 654 return err 655 } 656 pos := hashKey(key) & idx.mask 657 capacity := uint64(len(idx.elems)) 658 659 var d uint64 660 for { 661 // Exit empty slot exists. 662 if idx.elems[pos] == 0 { 663 idx.elems[pos] = offset 664 return nil 665 } 666 667 // Read key at current position. 668 curr, err := idx.readAt(idx.elems[pos]) 669 if err != nil { 670 return err 671 } 672 673 // Return an error if a duplicate key exists. 674 if bytes.Equal(curr, key) { 675 return errors.New("ethdb: duplicate key written to file segment") 676 } 677 678 // Swap if current element has a lower probe distance. 679 tmp := dist(hashKey(curr), pos, capacity, idx.mask) 680 if tmp < d { 681 offset, idx.elems[pos], d = idx.elems[pos], offset, tmp 682 } 683 684 // Move position forward. 685 pos = (pos + 1) & idx.mask 686 d++ 687 } 688 } 689 690 func dist(hash, i, capacity, mask uint64) uint64 { 691 return ((i + capacity) - (hash & mask)) & mask 692 } 693 694 // readAt reads the key at the given offset. 695 func (idx *fileSegmentEncoderIndex) readAt(offset int64) ([]byte, error) { 696 idx.r.Reset(idx.src) 697 if _, err := idx.src.Seek(offset, io.SeekStart); err != nil { 698 return nil, err 699 } 700 701 // Read key length. 702 n, err := binary.ReadUvarint(idx.r) 703 if err != nil { 704 return nil, err 705 } 706 707 // Read key. 708 key := make([]byte, n) 709 if _, err := io.ReadFull(idx.r, key); err != nil { 710 return nil, err 711 } 712 return key, nil 713 } 714 715 func hashKey(key []byte) uint64 { 716 h := xxhash.Sum64(key) 717 if h == 0 { 718 h = 1 719 } 720 return h 721 } 722 723 func pow2(v uint64) uint64 { 724 for i := uint64(2); i < 1<<62; i *= 2 { 725 if i >= v { 726 return i 727 } 728 } 729 panic("unreachable") 730 } 731 732 func hexdump(b []byte) { os.Stderr.Write([]byte(hex.Dump(b))) }