github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/history_index_block.go (about)

     1  // Copyright 2025 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/
    16  
    17  package pathdb
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"math"
    24  	"sort"
    25  )
    26  
    27  const (
    28  	indexBlockDescSize   = 14        // The size of index block descriptor
    29  	indexBlockEntriesCap = 4096      // The maximum number of entries can be grouped in a block
    30  	indexBlockRestartLen = 256       // The restart interval length of index block
    31  	historyIndexBatch    = 1_000_000 // The number of state history indexes for constructing or deleting as batch
    32  )
    33  
    34  // indexBlockDesc represents a descriptor for an index block, which contains a
    35  // list of state mutation records associated with a specific state (either an
    36  // account or a storage slot).
    37  type indexBlockDesc struct {
    38  	max     uint64 // The maximum state ID retained within the block
    39  	entries uint16 // The number of state mutation records retained within the block
    40  	id      uint32 // The id of the index block
    41  }
    42  
    43  func newIndexBlockDesc(id uint32) *indexBlockDesc {
    44  	return &indexBlockDesc{id: id}
    45  }
    46  
    47  // empty indicates whether the block is empty with no element retained.
    48  func (d *indexBlockDesc) empty() bool {
    49  	return d.entries == 0
    50  }
    51  
    52  // full indicates whether the number of elements in the block exceeds the
    53  // preconfigured limit.
    54  func (d *indexBlockDesc) full() bool {
    55  	return d.entries >= indexBlockEntriesCap
    56  }
    57  
    58  // encode packs index block descriptor into byte stream.
    59  func (d *indexBlockDesc) encode() []byte {
    60  	var buf [indexBlockDescSize]byte
    61  	binary.BigEndian.PutUint64(buf[0:8], d.max)
    62  	binary.BigEndian.PutUint16(buf[8:10], d.entries)
    63  	binary.BigEndian.PutUint32(buf[10:14], d.id)
    64  	return buf[:]
    65  }
    66  
    67  // decode unpacks index block descriptor from byte stream.
    68  func (d *indexBlockDesc) decode(blob []byte) {
    69  	d.max = binary.BigEndian.Uint64(blob[:8])
    70  	d.entries = binary.BigEndian.Uint16(blob[8:10])
    71  	d.id = binary.BigEndian.Uint32(blob[10:14])
    72  }
    73  
    74  // parseIndexBlock parses the index block with the supplied byte stream.
    75  // The index block format can be illustrated as below:
    76  //
    77  //			+---->+------------------+
    78  //			|     |      Chunk1      |
    79  //			|     +------------------+
    80  //			|     |      ......      |
    81  //			| +-->+------------------+
    82  //			| |   |      ChunkN      |
    83  //			| |   +------------------+
    84  //			+-|---|     Restart1     |
    85  //			  |   |     Restart...   |   2N bytes
    86  //			  +---|     RestartN     |
    87  //			      +------------------+
    88  //			      |  Restart count   |   1 byte
    89  //			      +------------------+
    90  //
    91  //	  - Chunk list: A list of data chunks
    92  //	  - Restart list: A list of 2-byte pointers, each pointing to the start position of a chunk
    93  //	  - Restart count: The number of restarts in the block, stored at the end of the block (1 byte)
    94  //
    95  // Note: the pointer is encoded as a uint16, which is sufficient within a chunk.
    96  // A uint16 can cover offsets in the range [0, 65536), which is more than enough
    97  // to store 4096 integers.
    98  //
    99  // Each chunk begins with the full value of the first integer, followed by
   100  // subsequent integers representing the differences between the current value
   101  // and the preceding one. Integers are encoded with variable-size for best
   102  // storage efficiency. Each chunk can be illustrated as below.
   103  //
   104  //		  Restart ---> +----------------+
   105  //	                   |  Full integer  |
   106  //		               +----------------+
   107  //		               | Diff with prev |
   108  //		               +----------------+
   109  //		               |      ...       |
   110  //		               +----------------+
   111  //		               | Diff with prev |
   112  //		               +----------------+
   113  //
   114  // Empty index block is regarded as invalid.
   115  func parseIndexBlock(blob []byte) ([]uint16, []byte, error) {
   116  	if len(blob) < 1 {
   117  		return nil, nil, fmt.Errorf("corrupted index block, len: %d", len(blob))
   118  	}
   119  	restartLen := blob[len(blob)-1]
   120  	if restartLen == 0 {
   121  		return nil, nil, errors.New("corrupted index block, no restart")
   122  	}
   123  	tailLen := int(restartLen)*2 + 1
   124  	if len(blob) < tailLen {
   125  		return nil, nil, fmt.Errorf("truncated restarts, size: %d, restarts: %d", len(blob), restartLen)
   126  	}
   127  	restarts := make([]uint16, 0, restartLen)
   128  	for i := int(restartLen); i > 0; i-- {
   129  		restart := binary.BigEndian.Uint16(blob[len(blob)-1-2*i:])
   130  		restarts = append(restarts, restart)
   131  	}
   132  	// Validate that restart points are strictly ordered and within the valid
   133  	// data range.
   134  	var prev uint16
   135  	for i := 0; i < len(restarts); i++ {
   136  		if i != 0 {
   137  			if restarts[i] <= prev {
   138  				return nil, nil, fmt.Errorf("restart out of order, prev: %d, next: %d", prev, restarts[i])
   139  			}
   140  		}
   141  		if int(restarts[i]) >= len(blob)-tailLen {
   142  			return nil, nil, fmt.Errorf("invalid restart position, restart: %d, size: %d", restarts[i], len(blob)-tailLen)
   143  		}
   144  		prev = restarts[i]
   145  	}
   146  	return restarts, blob[:len(blob)-tailLen], nil
   147  }
   148  
   149  // blockReader is the reader to access the element within a block.
   150  type blockReader struct {
   151  	restarts []uint16
   152  	data     []byte
   153  }
   154  
   155  // newBlockReader constructs the block reader with the supplied block data.
   156  func newBlockReader(blob []byte) (*blockReader, error) {
   157  	restarts, data, err := parseIndexBlock(blob)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  	return &blockReader{
   162  		restarts: restarts,
   163  		data:     data, // safe to own the slice
   164  	}, nil
   165  }
   166  
   167  // readGreaterThan locates the first element in the block that is greater than
   168  // the specified value. If no such element is found, MaxUint64 is returned.
   169  func (br *blockReader) readGreaterThan(id uint64) (uint64, error) {
   170  	var err error
   171  	index := sort.Search(len(br.restarts), func(i int) bool {
   172  		item, n := binary.Uvarint(br.data[br.restarts[i]:])
   173  		if n <= 0 {
   174  			err = fmt.Errorf("failed to decode item at restart %d", br.restarts[i])
   175  		}
   176  		return item > id
   177  	})
   178  	if err != nil {
   179  		return 0, err
   180  	}
   181  	if index == 0 {
   182  		item, _ := binary.Uvarint(br.data[br.restarts[0]:])
   183  		return item, nil
   184  	}
   185  	var (
   186  		start  int
   187  		limit  int
   188  		result uint64
   189  	)
   190  	if index == len(br.restarts) {
   191  		// The element being searched falls within the last restart section,
   192  		// there is no guarantee such element can be found.
   193  		start = int(br.restarts[len(br.restarts)-1])
   194  		limit = len(br.data)
   195  	} else {
   196  		// The element being searched falls within the non-last restart section,
   197  		// such element can be found for sure.
   198  		start = int(br.restarts[index-1])
   199  		limit = int(br.restarts[index])
   200  	}
   201  	pos := start
   202  	for pos < limit {
   203  		x, n := binary.Uvarint(br.data[pos:])
   204  		if pos == start {
   205  			result = x
   206  		} else {
   207  			result += x
   208  		}
   209  		if result > id {
   210  			return result, nil
   211  		}
   212  		pos += n
   213  	}
   214  	// The element which is greater than specified id is not found.
   215  	if index == len(br.restarts) {
   216  		return math.MaxUint64, nil
   217  	}
   218  	// The element which is the first one greater than the specified id
   219  	// is exactly the one located at the restart point.
   220  	item, _ := binary.Uvarint(br.data[br.restarts[index]:])
   221  	return item, nil
   222  }
   223  
   224  type blockWriter struct {
   225  	desc     *indexBlockDesc // Descriptor of the block
   226  	restarts []uint16        // Offsets into the data slice, marking the start of each section
   227  	scratch  []byte          // Buffer used for encoding full integers or value differences
   228  	data     []byte          // Aggregated encoded data slice
   229  }
   230  
   231  func newBlockWriter(blob []byte, desc *indexBlockDesc) (*blockWriter, error) {
   232  	scratch := make([]byte, binary.MaxVarintLen64)
   233  	if len(blob) == 0 {
   234  		return &blockWriter{
   235  			desc:    desc,
   236  			scratch: scratch,
   237  			data:    make([]byte, 0, 1024),
   238  		}, nil
   239  	}
   240  	restarts, data, err := parseIndexBlock(blob)
   241  	if err != nil {
   242  		return nil, err
   243  	}
   244  	return &blockWriter{
   245  		desc:     desc,
   246  		restarts: restarts,
   247  		scratch:  scratch,
   248  		data:     data, // safe to own the slice
   249  	}, nil
   250  }
   251  
   252  // append adds a new element to the block. The new element must be greater than
   253  // the previous one. The provided ID is assumed to always be greater than 0.
   254  func (b *blockWriter) append(id uint64) error {
   255  	if id == 0 {
   256  		return errors.New("invalid zero id")
   257  	}
   258  	if id <= b.desc.max {
   259  		return fmt.Errorf("append element out of order, last: %d, this: %d", b.desc.max, id)
   260  	}
   261  	// Rotate the current restart section if it's full
   262  	if b.desc.entries%indexBlockRestartLen == 0 {
   263  		// Save the offset within the data slice as the restart point
   264  		// for the next section.
   265  		b.restarts = append(b.restarts, uint16(len(b.data)))
   266  
   267  		// The restart point item can either be encoded in variable
   268  		// size or fixed size. Although variable-size encoding is
   269  		// slightly slower (2ns per operation), it is still relatively
   270  		// fast, therefore, it's picked for better space efficiency.
   271  		//
   272  		// The first element in a restart range is encoded using its
   273  		// full value.
   274  		n := binary.PutUvarint(b.scratch[0:], id)
   275  		b.data = append(b.data, b.scratch[:n]...)
   276  	} else {
   277  		// The current section is not full, append the element.
   278  		// The element which is not the first one in the section
   279  		// is encoded using the value difference from the preceding
   280  		// element.
   281  		n := binary.PutUvarint(b.scratch[0:], id-b.desc.max)
   282  		b.data = append(b.data, b.scratch[:n]...)
   283  	}
   284  	b.desc.entries++
   285  
   286  	// The state history ID must be greater than 0.
   287  	//if b.desc.min == 0 {
   288  	//	b.desc.min = id
   289  	//}
   290  	b.desc.max = id
   291  	return nil
   292  }
   293  
   294  // scanSection traverses the specified section and terminates if fn returns true.
   295  func (b *blockWriter) scanSection(section int, fn func(uint64, int) bool) {
   296  	var (
   297  		value uint64
   298  		start = int(b.restarts[section])
   299  		pos   = start
   300  		limit int
   301  	)
   302  	if section == len(b.restarts)-1 {
   303  		limit = len(b.data)
   304  	} else {
   305  		limit = int(b.restarts[section+1])
   306  	}
   307  	for pos < limit {
   308  		x, n := binary.Uvarint(b.data[pos:])
   309  		if pos == start {
   310  			value = x
   311  		} else {
   312  			value += x
   313  		}
   314  		if fn(value, pos) {
   315  			return
   316  		}
   317  		pos += n
   318  	}
   319  }
   320  
   321  // sectionLast returns the last element in the specified section.
   322  func (b *blockWriter) sectionLast(section int) uint64 {
   323  	var n uint64
   324  	b.scanSection(section, func(v uint64, _ int) bool {
   325  		n = v
   326  		return false
   327  	})
   328  	return n
   329  }
   330  
   331  // sectionSearch looks up the specified value in the given section,
   332  // the position and the preceding value will be returned if found.
   333  func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int) {
   334  	b.scanSection(section, func(v uint64, p int) bool {
   335  		if n == v {
   336  			pos = p
   337  			found = true
   338  			return true // terminate iteration
   339  		}
   340  		prev = v
   341  		return false // continue iteration
   342  	})
   343  	return found, prev, pos
   344  }
   345  
   346  // pop removes the last element from the block. The assumption is held that block
   347  // writer must be non-empty.
   348  func (b *blockWriter) pop(id uint64) error {
   349  	if id == 0 {
   350  		return errors.New("invalid zero id")
   351  	}
   352  	if id != b.desc.max {
   353  		return fmt.Errorf("pop element out of order, last: %d, this: %d", b.desc.max, id)
   354  	}
   355  	// If there is only one entry left, the entire block should be reset
   356  	if b.desc.entries == 1 {
   357  		//b.desc.min = 0
   358  		b.desc.max = 0
   359  		b.desc.entries = 0
   360  		b.restarts = nil
   361  		b.data = b.data[:0]
   362  		return nil
   363  	}
   364  	// Pop the last restart section if the section becomes empty after removing
   365  	// one element.
   366  	if b.desc.entries%indexBlockRestartLen == 1 {
   367  		b.data = b.data[:b.restarts[len(b.restarts)-1]]
   368  		b.restarts = b.restarts[:len(b.restarts)-1]
   369  		b.desc.max = b.sectionLast(len(b.restarts) - 1)
   370  		b.desc.entries -= 1
   371  		return nil
   372  	}
   373  	// Look up the element preceding the one to be popped, in order to update
   374  	// the maximum element in the block.
   375  	found, prev, pos := b.sectionSearch(len(b.restarts)-1, id)
   376  	if !found {
   377  		return fmt.Errorf("pop element is not found, last: %d, this: %d", b.desc.max, id)
   378  	}
   379  	b.desc.max = prev
   380  	b.data = b.data[:pos]
   381  	b.desc.entries -= 1
   382  	return nil
   383  }
   384  
   385  func (b *blockWriter) empty() bool {
   386  	return b.desc.empty()
   387  }
   388  
   389  func (b *blockWriter) full() bool {
   390  	return b.desc.full()
   391  }
   392  
   393  // finish finalizes the index block encoding by appending the encoded restart points
   394  // and the restart counter to the end of the block.
   395  //
   396  // This function is safe to be called multiple times.
   397  func (b *blockWriter) finish() []byte {
   398  	var buf []byte
   399  	for _, number := range b.restarts {
   400  		binary.BigEndian.PutUint16(b.scratch[:2], number)
   401  		buf = append(buf, b.scratch[:2]...)
   402  	}
   403  	buf = append(buf, byte(len(b.restarts)))
   404  	return append(b.data, buf...)
   405  }