code.vegaprotocol.io/vega@v0.79.0/core/snapshot/tree/tree.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package tree
    17  
    18  import (
    19  	"bytes"
    20  	"crypto/sha256"
    21  	"errors"
    22  	"fmt"
    23  
    24  	"code.vegaprotocol.io/vega/core/types"
    25  	"code.vegaprotocol.io/vega/libs/proto"
    26  	"code.vegaprotocol.io/vega/logging"
    27  	snapshotpb "code.vegaprotocol.io/vega/protos/vega/snapshot/v1"
    28  
    29  	tmtypes "github.com/cometbft/cometbft/abci/types"
    30  	"github.com/cosmos/iavl"
    31  )
    32  
    33  // Tree is a high-level structure that abstract the management of the AVL away
    34  // from the snapshot engine. It ensure the snapshot and metadata databases are
    35  // in-sync, and old snapshots are removed when exceeding the maximum number to
    36  // keep configured.
    37  // When created, it will automatically remove old local snapshots and load the
    38  // ones left.
    39  // When the tree is no longer used, the resources it holds must be released by
    40  // calling Release().
    41  type Tree struct {
    42  	log *logging.Logger
    43  
    44  	blockHeightToStartFrom     uint64
    45  	maxNumberOfSnapshotsToKeep uint64
    46  
    47  	metadataDB MetadataDatabase
    48  	snapshotDB SnapshotsDatabase
    49  
    50  	innerTree *iavl.MutableTree
    51  }
    52  
    53  func (t *Tree) HasSnapshotsLoaded() bool {
    54  	return !t.metadataDB.IsEmpty()
    55  }
    56  
    57  func (t *Tree) Hash() []byte {
    58  	// Not returning the error as it is fairly unlikely to fail, and that makes
    59  	// theis API simpler to deal with.
    60  	hash, err := t.innerTree.Hash()
    61  	if err != nil {
    62  		t.log.Error("Could not computing the tree hash", logging.Error(err))
    63  	}
    64  
    65  	// When no tree has been saved, the underlying root from which the hash is
    66  	// computed is nil. When nil, a "default" hash is returned. In our case,
    67  	// if there is no saved tree, we want a nil hash to avoid misinterpretation.
    68  	if bytes.Equal(hash, sha256.New().Sum(nil)) {
    69  		return nil
    70  	}
    71  
    72  	return hash
    73  }
    74  
    75  func (t *Tree) WorkingHash() []byte {
    76  	hash, err := t.innerTree.WorkingHash()
    77  	if err != nil {
    78  		t.log.Error("Could not computing the working tree hash", logging.Error(err))
    79  	}
    80  
    81  	return hash
    82  }
    83  
    84  func (t *Tree) RemoveKey(key []byte) (bool, error) {
    85  	if ok, _ := t.innerTree.Has(key); ok {
    86  		_, removed, err := t.innerTree.Remove(key)
    87  		return removed, err
    88  	}
    89  	return false, nil
    90  }
    91  
    92  func (t *Tree) AddState(key []byte, state []byte) {
    93  	_, _ = t.innerTree.Set(key, state)
    94  }
    95  
    96  func (t *Tree) AsPayloads() ([]*types.Payload, error) {
    97  	lastSnapshotTree, err := t.innerTree.GetImmutable(t.innerTree.Version())
    98  	if err != nil {
    99  		return nil, fmt.Errorf("could not generate the immutable AVL tree: %w", err)
   100  	}
   101  
   102  	exporter, err := lastSnapshotTree.Export()
   103  	if err != nil {
   104  		return nil, fmt.Errorf("could not export the AVL tree: %w", err)
   105  	}
   106  	defer exporter.Close()
   107  
   108  	payloads := []*types.Payload{}
   109  
   110  	exportedNode, err := exporter.Next()
   111  	for err == nil {
   112  		// If there is no value, it means the node is an intermediary node and
   113  		// not a leaf. Only leaves hold the data we are looking for.
   114  		if exportedNode.Value == nil {
   115  			exportedNode, err = exporter.Next()
   116  			continue
   117  		}
   118  
   119  		// sort out the payload for this node
   120  		payloadProto := &snapshotpb.Payload{}
   121  		if perr := proto.Unmarshal(exportedNode.Value, payloadProto); perr != nil {
   122  			return nil, perr
   123  		}
   124  
   125  		payloads = append(payloads, types.PayloadFromProto(payloadProto))
   126  
   127  		exportedNode, err = exporter.Next()
   128  	}
   129  
   130  	if !errors.Is(err, iavl.ErrorExportDone) {
   131  		return nil, fmt.Errorf("failed to export AVL tree: %w", err)
   132  	}
   133  
   134  	return payloads, nil
   135  }
   136  
   137  func (t *Tree) AsProtoPayloads() ([]*snapshotpb.Payload, error) {
   138  	lastSnapshotTree, err := t.innerTree.GetImmutable(t.innerTree.Version())
   139  	if err != nil {
   140  		return nil, fmt.Errorf("could not generate the immutable AVL tree: %w", err)
   141  	}
   142  
   143  	exporter, err := lastSnapshotTree.Export()
   144  	if err != nil {
   145  		return nil, fmt.Errorf("could not export the AVL tree: %w", err)
   146  	}
   147  	defer exporter.Close()
   148  
   149  	payloads := []*snapshotpb.Payload{}
   150  
   151  	exportedNode, err := exporter.Next()
   152  	for err == nil {
   153  		// If there is no value, it means the node is an intermediary node and
   154  		// not a leaf. Only leaves hold the data we are looking for.
   155  		if exportedNode.Value == nil {
   156  			exportedNode, err = exporter.Next()
   157  			continue
   158  		}
   159  
   160  		// sort out the payload for this node
   161  		payloadProto := &snapshotpb.Payload{}
   162  		if perr := proto.Unmarshal(exportedNode.Value, payloadProto); perr != nil {
   163  			return nil, perr
   164  		}
   165  
   166  		payloads = append(payloads, payloadProto)
   167  
   168  		exportedNode, err = exporter.Next()
   169  	}
   170  
   171  	if !errors.Is(err, iavl.ErrorExportDone) {
   172  		return nil, fmt.Errorf("failed to export AVL tree: %w", err)
   173  	}
   174  
   175  	return payloads, nil
   176  }
   177  
   178  func (t *Tree) FindImmutableTreeByHeight(blockHeight uint64) (*iavl.ImmutableTree, error) {
   179  	version, err := t.metadataDB.FindVersionByBlockHeight(blockHeight)
   180  	if err != nil {
   181  		return nil, fmt.Errorf("an error occurred while looking for snapshot version: %w", err)
   182  	}
   183  	if version == -1 {
   184  		return nil, fmt.Errorf("no snapshot found for block height %d", blockHeight)
   185  	}
   186  
   187  	return t.innerTree.GetImmutable(version)
   188  }
   189  
   190  func (t *Tree) ListLatestSnapshots(maxLengthOfSnapshotList uint64) ([]*tmtypes.Snapshot, error) {
   191  	availableVersions := t.innerTree.AvailableVersions()
   192  	numberOfAvailableVersions := len(availableVersions)
   193  
   194  	listLength := maxLengthOfSnapshotList
   195  	fromIndex := numberOfAvailableVersions - int(maxLengthOfSnapshotList) - 1
   196  
   197  	// If negative, it means there is less versions than the maximum allowed, so
   198  	// we start from 0.
   199  	if fromIndex < 0 {
   200  		fromIndex = 0
   201  		listLength = uint64(numberOfAvailableVersions)
   202  	}
   203  
   204  	snapshotList := make([]*tmtypes.Snapshot, 0, listLength)
   205  
   206  	snapshotCount := 0
   207  	for i := fromIndex; i < numberOfAvailableVersions; i++ {
   208  		version := int64(availableVersions[i])
   209  
   210  		loadedSnapshot, err := t.metadataDB.Load(version)
   211  		if err != nil {
   212  			t.log.Error("could not load snapshot from the metadata database",
   213  				logging.Int64("version", version),
   214  				logging.Error(err),
   215  			)
   216  			// We ignore broken snapshot state.
   217  			continue
   218  		}
   219  		snapshotList = append(snapshotList, loadedSnapshot)
   220  		snapshotCount++
   221  	}
   222  
   223  	return snapshotList[0:snapshotCount], nil
   224  }
   225  
   226  func (t *Tree) AddSnapshot(s *types.Snapshot) error {
   227  	importer, err := t.innerTree.Import(s.Meta.Version)
   228  	if err != nil {
   229  		return fmt.Errorf("could not initialize the AVL tree importer: %w", err)
   230  	}
   231  	defer importer.Close()
   232  
   233  	// Convert slice into map for quick lookup.
   234  	payloads := map[string]*types.Payload{}
   235  	for _, pl := range s.Nodes {
   236  		payloads[pl.TreeKey()] = pl
   237  	}
   238  
   239  	for _, n := range s.Meta.NodeHashes {
   240  		var value []byte
   241  		if n.IsLeaf {
   242  			payload, ok := payloads[n.Key]
   243  			if !ok {
   244  				return fmt.Errorf("the payload for key %q is missing from the snapshot", n.Key)
   245  			}
   246  			value, err = proto.Marshal(payload.IntoProto())
   247  			if err != nil {
   248  				return fmt.Errorf("could not serialize the payload: %w", err)
   249  			}
   250  		} else {
   251  			// The importer interprets any empty-non-nil slice as an actual empty
   252  			// node. To ensure the importer correctly interprets it as "no value",
   253  			// It has to be nil.
   254  			// This has been made explicit for future reference.
   255  			value = nil
   256  		}
   257  
   258  		exportedNode := &iavl.ExportNode{
   259  			Key:   []byte(n.Key),
   260  			Value: value,
   261  			// This is the height of the node in the tree.
   262  			Height: int8(n.Height),
   263  			// This is the version of the node in the tree. It is incremented if
   264  			// that node's value is updated.
   265  			Version: n.Version,
   266  		}
   267  
   268  		if err := importer.Add(exportedNode); err != nil {
   269  			return fmt.Errorf("could not import tree node: %w", err)
   270  		}
   271  	}
   272  
   273  	if err := importer.Commit(); err != nil {
   274  		return fmt.Errorf("could not finalize the snapshot import into the tree: %w", err)
   275  	}
   276  
   277  	snapshotAsTendermintFormat, err := s.ToTM()
   278  	if err != nil {
   279  		return fmt.Errorf("could not serialize the snapshot as Tendermint proto: %w", err)
   280  	}
   281  
   282  	if err := t.metadataDB.Save(t.innerTree.Version(), snapshotAsTendermintFormat); err != nil {
   283  		return fmt.Errorf("could not save the snapshot metadata: %w", err)
   284  	}
   285  
   286  	return nil
   287  }
   288  
   289  func (t *Tree) Release() {
   290  	if t.snapshotDB != nil {
   291  		if err := t.snapshotDB.Close(); err != nil {
   292  			t.log.Error("could not cleanly close the snapshot database", logging.Error(err))
   293  		}
   294  	}
   295  
   296  	if t.metadataDB != nil {
   297  		if err := t.metadataDB.Close(); err != nil {
   298  			t.log.Error("could not cleanly close the metadata database", logging.Error(err))
   299  		}
   300  	}
   301  }
   302  
   303  func (t *Tree) initializeFromLocalStore() error {
   304  	// This initialises the AVL tree based on the content of the database.
   305  	// It's required to know the available versions, so we can perform look up
   306  	// and clean up.
   307  	// As a side effect, it will load the latest snapshot in the tree.
   308  	if _, err := t.innerTree.Load(); err != nil {
   309  		return fmt.Errorf("could not load local snapshots into the AVL tree: %w", err)
   310  	}
   311  
   312  	if err := t.removeOldSnapshots(); err != nil {
   313  		return err
   314  	}
   315  
   316  	// Load the snapshot matching the specified block height. The specified block height
   317  	// has to be a perfect match of the snapshot's block height, otherwise it fails.
   318  	// If the block height is set to 0, it uses the latest snapshot, that has
   319  	// already been loaded when calling (*MutableTree) Load(), at the
   320  	// beginning of the method.
   321  	if t.blockHeightToStartFrom > 0 {
   322  		if err := t.loadTreeAtBlockHeight(t.blockHeightToStartFrom); err != nil {
   323  			return err
   324  		}
   325  	}
   326  	t.log.Info("Snapshot has been loaded", logging.Int64("version", t.innerTree.Version()))
   327  
   328  	return nil
   329  }
   330  
   331  func (t *Tree) removeOldSnapshots() error {
   332  	maxNumberOfSnapshotsToKeep := int(t.maxNumberOfSnapshotsToKeep)
   333  	availableVersions := t.innerTree.AvailableVersions()
   334  	currentNumberOfSnapshots := len(availableVersions)
   335  
   336  	if currentNumberOfSnapshots > maxNumberOfSnapshotsToKeep {
   337  		fromVersion := availableVersions[0]
   338  		// The version defined by variable `toVersion` is excluded from the deletion.
   339  		indexOfOldestVersionToKeep := currentNumberOfSnapshots - maxNumberOfSnapshotsToKeep
   340  		toVersion := availableVersions[indexOfOldestVersionToKeep]
   341  		if err := t.innerTree.DeleteVersionsRange(int64(fromVersion), int64(toVersion)); err != nil {
   342  			// Based on the method documentation, this would only happen in the
   343  			// presence of a programming error.
   344  			return fmt.Errorf("could not remove old snapshots: %w", err)
   345  		}
   346  
   347  		t.log.Info("Old snapshots deleted",
   348  			logging.Int("from-version", fromVersion),
   349  			logging.Int("to-version", toVersion),
   350  		)
   351  
   352  		if err := t.metadataDB.DeleteRange(int64(fromVersion), int64(toVersion)); err != nil {
   353  			return fmt.Errorf("could not remove old snapshots metadata: %w", err)
   354  		}
   355  
   356  		availableVersions = t.innerTree.AvailableVersions()
   357  		currentNumberOfSnapshots = len(availableVersions)
   358  	}
   359  
   360  	if currentNumberOfSnapshots == 1 {
   361  		t.log.Info("Single snapshot stored", logging.Int("version", availableVersions[0]))
   362  	} else {
   363  		t.log.Info("Multiple snapshots stored",
   364  			logging.Int("from-version", availableVersions[0]),
   365  			logging.Int("to-version", availableVersions[currentNumberOfSnapshots-1]),
   366  		)
   367  	}
   368  
   369  	return nil
   370  }
   371  
   372  func (t *Tree) loadTreeAtBlockHeight(startHeight uint64) error {
   373  	versionToLoad, err := t.metadataDB.FindVersionByBlockHeight(startHeight)
   374  	if err != nil {
   375  		return fmt.Errorf("an error occurred while looking for snapshot version: %w", err)
   376  	}
   377  	if versionToLoad == -1 {
   378  		return fmt.Errorf("no snapshot found for block height %d", startHeight)
   379  	}
   380  
   381  	// Since it may reload a version anterior to the latest one, it has to reset
   382  	// the tree as if `versionToLoad` was the latest known snapshot.
   383  	// This helps to keep a clean snapshot database, and to prevent the upcoming
   384  	// snapshots to step on these older snapshots, and cause mayhem.
   385  	// The mayhem would originate from the underlying AVL tree library that will
   386  	// save the upcoming snapshot as one that comes right after the loaded one,
   387  	// without accounting for the ones that already exist.
   388  	if _, err := t.innerTree.LoadVersionForOverwriting(versionToLoad); err != nil {
   389  		return fmt.Errorf("could not load snapshot with version %d in AVL tree: %w", versionToLoad, err)
   390  	}
   391  
   392  	return nil
   393  }
   394  
   395  func (t *Tree) SaveVersion() error {
   396  	_, _, err := t.innerTree.SaveVersion()
   397  	if err != nil {
   398  		return fmt.Errorf("could not save the working tree: %w", err)
   399  	}
   400  
   401  	availableVersion := t.innerTree.AvailableVersions()
   402  	numberOfAvailableVersion := uint64(len(availableVersion))
   403  
   404  	if numberOfAvailableVersion > t.maxNumberOfSnapshotsToKeep {
   405  		versionToDelete := int64(availableVersion[0])
   406  		if err := t.innerTree.DeleteVersion(versionToDelete); err != nil {
   407  			t.log.Error("Could not remove old snapshot ",
   408  				logging.Int64("version", versionToDelete),
   409  				logging.Error(err),
   410  			)
   411  		} else {
   412  			t.log.Info("Old snapshot deleted", logging.Int64("version", versionToDelete))
   413  		}
   414  
   415  		if err := t.metadataDB.Delete(versionToDelete); err != nil {
   416  			t.log.Error("Could not remove old snapshot metadata",
   417  				logging.Int64("version", versionToDelete),
   418  				logging.Error(err),
   419  			)
   420  		} else {
   421  			t.log.Info("Old snapshot metadata deleted", logging.Int64("version", versionToDelete))
   422  		}
   423  	}
   424  
   425  	immutableTree, err := t.innerTree.GetImmutable(t.innerTree.Version())
   426  	if err != nil {
   427  		return fmt.Errorf("could not generate immutable tree: %w", err)
   428  	}
   429  
   430  	snapshot, err := types.SnapshotFromTree(immutableTree)
   431  	if err != nil {
   432  		return fmt.Errorf("could not serialize the snapshot from tree: %w", err)
   433  	}
   434  
   435  	tendermintSnapshot, err := snapshot.ToTM()
   436  	if err != nil {
   437  		return fmt.Errorf("could not serialize the snapshot as Tendermint proto: %w", err)
   438  	}
   439  
   440  	if err := t.metadataDB.Save(t.innerTree.Version(), tendermintSnapshot); err != nil {
   441  		return fmt.Errorf("could not save the snapshot to metadata database: %w", err)
   442  	}
   443  
   444  	return nil
   445  }
   446  
   447  func New(log *logging.Logger, opts ...Options) (*Tree, error) {
   448  	tree := &Tree{
   449  		log: log,
   450  
   451  		maxNumberOfSnapshotsToKeep: 10,
   452  		blockHeightToStartFrom:     0,
   453  	}
   454  
   455  	for _, opt := range opts {
   456  		if err := opt(tree); err != nil {
   457  			// If it fails initialization, any allocated resources is released.
   458  			tree.Release()
   459  			return nil, err
   460  		}
   461  	}
   462  
   463  	if tree.snapshotDB == nil || tree.metadataDB == nil {
   464  		panic("the databases have not been initialize")
   465  	}
   466  
   467  	innerTree, err := iavl.NewMutableTree(tree.snapshotDB, 0, false)
   468  	if err != nil {
   469  		return nil, fmt.Errorf("could not initialize the AVL tree: %w", err)
   470  	}
   471  	tree.innerTree = innerTree
   472  
   473  	// TODO: At this point, we should ensure the state of the metadata database
   474  	//  is as consistent as possible with snapshots database. This will lower
   475  	//  the probability of errors.
   476  
   477  	if tree.metadataDB.IsEmpty() {
   478  		// There is no metadata, so we assume there is no snapshots to load from.
   479  		return tree, nil
   480  	}
   481  
   482  	if err := tree.initializeFromLocalStore(); err != nil {
   483  		return nil, fmt.Errorf("could not load local snapshots into the tree: %w", err)
   484  	}
   485  
   486  	return tree, nil
   487  }