github.com/tacshi/go-ethereum@v0.0.0-20230616113857-84a434e20921/core/state/pruner/bloom.go (about)

     1  // Copyright 2021 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package pruner
    18  
    19  import (
    20  	"bufio"
    21  	"encoding/binary"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"os"
    26  
    27  	bloomfilter "github.com/holiman/bloomfilter/v2"
    28  	"github.com/tacshi/go-ethereum/common"
    29  	"github.com/tacshi/go-ethereum/core/rawdb"
    30  	"github.com/tacshi/go-ethereum/log"
    31  	"github.com/tacshi/go-ethereum/rlp"
    32  )
    33  
    34  // stateBloomHasher is a wrapper around a byte blob to satisfy the interface API
    35  // requirements of the bloom library used. It's used to convert a trie hash or
    36  // contract code hash into a 64 bit mini hash.
    37  type stateBloomHasher []byte
    38  
    39  func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
    40  func (f stateBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
    41  func (f stateBloomHasher) Reset()                            { panic("not implemented") }
    42  func (f stateBloomHasher) BlockSize() int                    { panic("not implemented") }
    43  func (f stateBloomHasher) Size() int                         { return 8 }
    44  func (f stateBloomHasher) Sum64() uint64                     { return binary.BigEndian.Uint64(f) }
    45  
    46  // stateBloom is a bloom filter used during the state conversion(snapshot->state).
    47  // The keys of all generated entries will be recorded here so that in the pruning
    48  // stage the entries belong to the specific version can be avoided for deletion.
    49  //
    50  // The false-positive is allowed here. The "false-positive" entries means they
    51  // actually don't belong to the specific version but they are not deleted in the
    52  // pruning. The downside of the false-positive allowance is we may leave some "dangling"
    53  // nodes in the disk. But in practice the it's very unlike the dangling node is
    54  // state root. So in theory this pruned state shouldn't be visited anymore. Another
    55  // potential issue is for fast sync. If we do another fast sync upon the pruned
    56  // database, it's problematic which will stop the expansion during the syncing.
    57  // TODO address it @rjl493456442 @holiman @karalabe.
    58  //
    59  // After the entire state is generated, the bloom filter should be persisted into
    60  // the disk. It indicates the whole generation procedure is finished.
    61  type stateBloom struct {
    62  	bloom *bloomfilter.Filter
    63  }
    64  
    65  // newStateBloomWithSize creates a brand new state bloom for state generation.
    66  // The bloom filter will be created by the passing bloom filter size. According
    67  // to the https://hur.st/bloomfilter/?n=600000000&p=&m=2048MB&k=4, the parameters
    68  // are picked so that the false-positive rate for mainnet is low enough.
    69  func newStateBloomWithSize(size uint64) (*stateBloom, error) {
    70  	bloom, err := bloomfilter.New(size*1024*1024*8, 4)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  	log.Info("Initialized state bloom", "size", common.StorageSize(float64(bloom.M()/8)))
    75  	return &stateBloom{bloom: bloom}, nil
    76  }
    77  
    78  // NewStateBloomFromDisk loads the state bloom from the given file.
    79  // In this case the assumption is held the bloom filter is complete.
    80  func NewStateBloomFromDisk(filename string) (*stateBloom, []common.Hash, error) {
    81  	f, err := os.Open(filename)
    82  	if err != nil {
    83  		return nil, nil, err
    84  	}
    85  	defer f.Close()
    86  	r := bufio.NewReader(f)
    87  	version := []byte{0}
    88  	_, err = io.ReadFull(r, version)
    89  	if err != nil {
    90  		return nil, nil, err
    91  	}
    92  	if version[0] != 0 {
    93  		return nil, nil, fmt.Errorf("unknown state bloom filter version %v", version[0])
    94  	}
    95  	var roots []common.Hash
    96  	err = rlp.Decode(r, &roots)
    97  	if err != nil {
    98  		return nil, nil, err
    99  	}
   100  	bloom, _, err := bloomfilter.ReadFrom(r)
   101  	if err != nil {
   102  		return nil, nil, err
   103  	}
   104  	return &stateBloom{bloom: bloom}, roots, nil
   105  }
   106  
   107  // Commit flushes the bloom filter content into the disk and marks the bloom
   108  // as complete.
   109  func (bloom *stateBloom) Commit(filename, tempname string, roots []common.Hash) error {
   110  	f, err := os.OpenFile(tempname, os.O_RDWR|os.O_CREATE, 0666)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	_, err = f.Write([]byte{0}) // version
   115  	if err != nil {
   116  		return err
   117  	}
   118  	err = rlp.Encode(f, roots)
   119  	if err != nil {
   120  		return err
   121  	}
   122  	// Write the bloom out into a temporary file
   123  	_, err = bloom.bloom.WriteTo(f)
   124  	if err != nil {
   125  		return err
   126  	}
   127  	// Ensure the file is synced to disk
   128  	if err := f.Sync(); err != nil {
   129  		f.Close()
   130  		return err
   131  	}
   132  	f.Close()
   133  
   134  	// Move the temporary file into it's final location
   135  	return os.Rename(tempname, filename)
   136  }
   137  
   138  // Put implements the KeyValueWriter interface. But here only the key is needed.
   139  func (bloom *stateBloom) Put(key []byte, value []byte) error {
   140  	// If the key length is not 32bytes, ensure it's contract code
   141  	// entry with new scheme.
   142  	if len(key) != common.HashLength {
   143  		isCode, codeKey := rawdb.IsCodeKey(key)
   144  		if !isCode {
   145  			return errors.New("invalid entry")
   146  		}
   147  		bloom.bloom.Add(stateBloomHasher(codeKey))
   148  		return nil
   149  	}
   150  	bloom.bloom.Add(stateBloomHasher(key))
   151  	return nil
   152  }
   153  
   154  // Delete removes the key from the key-value data store.
   155  func (bloom *stateBloom) Delete(key []byte) error { panic("not supported") }
   156  
   157  // Contain is the wrapper of the underlying contains function which
   158  // reports whether the key is contained.
   159  // - If it says yes, the key may be contained
   160  // - If it says no, the key is definitely not contained.
   161  func (bloom *stateBloom) Contain(key []byte) (bool, error) {
   162  	return bloom.bloom.Contains(stateBloomHasher(key)), nil
   163  }
   164  
   165  func (bloom *stateBloom) FalsePosititveProbability() float64 {
   166  	return bloom.bloom.FalsePosititveProbability()
   167  }
   168  
   169  func (bloom *stateBloom) Size() uint64 {
   170  	return bloom.bloom.M()
   171  }