github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/ledger/complete/wal/wal.go (about)

     1  package wal
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  
     7  	prometheusWAL "github.com/onflow/wal/wal"
     8  	"github.com/prometheus/client_golang/prometheus"
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/ledger"
    12  	"github.com/onflow/flow-go/ledger/complete/mtrie"
    13  	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
    14  	"github.com/onflow/flow-go/module"
    15  )
    16  
    17  const SegmentSize = 32 * 1024 * 1024 // 32 MB
    18  
    19  type DiskWAL struct {
    20  	wal            *prometheusWAL.WAL
    21  	paused         bool
    22  	forestCapacity int
    23  	pathByteSize   int
    24  	log            zerolog.Logger
    25  	dir            string
    26  }
    27  
    28  // TODO use real logger and metrics, but that would require passing them to Trie storage
    29  func NewDiskWAL(logger zerolog.Logger, reg prometheus.Registerer, metrics module.WALMetrics, dir string, forestCapacity int, pathByteSize int, segmentSize int) (*DiskWAL, error) {
    30  	w, err := prometheusWAL.NewSize(logger, reg, dir, segmentSize, false)
    31  	if err != nil {
    32  		return nil, fmt.Errorf("could not create disk wal from dir %v, segmentSize %v: %w", dir, segmentSize, err)
    33  	}
    34  	return &DiskWAL{
    35  		wal:            w,
    36  		paused:         false,
    37  		forestCapacity: forestCapacity,
    38  		pathByteSize:   pathByteSize,
    39  		log:            logger.With().Str("ledger_mod", "diskwal").Logger(),
    40  		dir:            dir,
    41  	}, nil
    42  }
    43  
    44  func (w *DiskWAL) PauseRecord() {
    45  	w.paused = true
    46  }
    47  
    48  func (w *DiskWAL) UnpauseRecord() {
    49  	w.paused = false
    50  }
    51  
    52  // RecordUpdate writes the trie update to the write ahead log on disk.
    53  // if write ahead logging is not paused, it returns the file num (write ahead log) that the trie update was written to.
    54  // if write ahead logging is enabled, the second returned value is false, otherwise it's true, meaning WAL is disabled.
    55  func (w *DiskWAL) RecordUpdate(update *ledger.TrieUpdate) (segmentNum int, skipped bool, err error) {
    56  	if w.paused {
    57  		return 0, true, nil
    58  	}
    59  
    60  	bytes := EncodeUpdate(update)
    61  
    62  	locations, err := w.wal.Log(bytes)
    63  
    64  	if err != nil {
    65  		return 0, false, fmt.Errorf("error while recording update in LedgerWAL: %w", err)
    66  	}
    67  	if len(locations) != 1 {
    68  		return 0, false, fmt.Errorf("error while recording update in LedgerWAL: got %d location, expect 1 location", len(locations))
    69  	}
    70  
    71  	return locations[0].Segment, false, nil
    72  }
    73  
    74  func (w *DiskWAL) RecordDelete(rootHash ledger.RootHash) error {
    75  	if w.paused {
    76  		return nil
    77  	}
    78  
    79  	bytes := EncodeDelete(rootHash)
    80  
    81  	_, err := w.wal.Log(bytes)
    82  
    83  	if err != nil {
    84  		return fmt.Errorf("error while recording delete in LedgerWAL: %w", err)
    85  	}
    86  	return nil
    87  }
    88  
    89  func (w *DiskWAL) ReplayOnForest(forest *mtrie.Forest) error {
    90  	return w.Replay(
    91  		func(tries []*trie.MTrie) error {
    92  			err := forest.AddTries(tries)
    93  			if err != nil {
    94  				return fmt.Errorf("adding rebuilt tries to forest failed: %w", err)
    95  			}
    96  			return nil
    97  		},
    98  		func(update *ledger.TrieUpdate) error {
    99  			_, err := forest.Update(update)
   100  			return err
   101  		},
   102  		func(rootHash ledger.RootHash) error {
   103  			return nil
   104  		},
   105  	)
   106  }
   107  
   108  func (w *DiskWAL) Segments() (first, last int, err error) {
   109  	return prometheusWAL.Segments(w.wal.Dir())
   110  }
   111  
   112  func (w *DiskWAL) Replay(
   113  	checkpointFn func(tries []*trie.MTrie) error,
   114  	updateFn func(update *ledger.TrieUpdate) error,
   115  	deleteFn func(ledger.RootHash) error,
   116  ) error {
   117  	from, to, err := w.Segments()
   118  	if err != nil {
   119  		return fmt.Errorf("could not find segments: %w", err)
   120  	}
   121  	err = w.replay(from, to, checkpointFn, updateFn, deleteFn, true)
   122  	if err != nil {
   123  		return fmt.Errorf("could not replay segments [%v:%v]: %w", from, to, err)
   124  	}
   125  	return nil
   126  }
   127  
   128  func (w *DiskWAL) ReplayLogsOnly(
   129  	checkpointFn func(tries []*trie.MTrie) error,
   130  	updateFn func(update *ledger.TrieUpdate) error,
   131  	deleteFn func(rootHash ledger.RootHash) error,
   132  ) error {
   133  	from, to, err := w.Segments()
   134  	if err != nil {
   135  		return fmt.Errorf("could not find segments: %w", err)
   136  	}
   137  	err = w.replay(from, to, checkpointFn, updateFn, deleteFn, false)
   138  	if err != nil {
   139  		return fmt.Errorf("could not replay WAL only for segments [%v:%v]: %w", from, to, err)
   140  	}
   141  	return nil
   142  }
   143  
   144  func (w *DiskWAL) replay(
   145  	from, to int,
   146  	checkpointFn func(tries []*trie.MTrie) error,
   147  	updateFn func(update *ledger.TrieUpdate) error,
   148  	deleteFn func(rootHash ledger.RootHash) error,
   149  	useCheckpoints bool,
   150  ) error {
   151  
   152  	w.log.Info().Msgf("loading checkpoint with WAL from %d to %d, useCheckpoints %v", from, to, useCheckpoints)
   153  
   154  	if to < from {
   155  		return fmt.Errorf("end of range cannot be smaller than beginning")
   156  	}
   157  
   158  	loadedCheckpoint := -1
   159  	startSegment := from
   160  	checkpointLoaded := false
   161  
   162  	checkpointer, err := w.NewCheckpointer()
   163  	if err != nil {
   164  		return fmt.Errorf("cannot create checkpointer: %w", err)
   165  	}
   166  
   167  	if useCheckpoints {
   168  		allCheckpoints, err := checkpointer.Checkpoints()
   169  		if err != nil {
   170  			return fmt.Errorf("cannot get list of checkpoints: %w", err)
   171  		}
   172  
   173  		var availableCheckpoints []int
   174  
   175  		// if there are no checkpoints already, don't bother
   176  		if len(allCheckpoints) > 0 {
   177  			// from-1 to account for checkpoints connected to segments, ie. checkpoint 8 if replaying segments 9-12
   178  			availableCheckpoints = getPossibleCheckpoints(allCheckpoints, from-1, to)
   179  		}
   180  
   181  		w.log.Info().Ints("checkpoints", availableCheckpoints).Msg("available checkpoints")
   182  
   183  		for len(availableCheckpoints) > 0 {
   184  			// as long as there are checkpoints to try, we always try with the last checkpoint file, since
   185  			// it allows us to load less segments.
   186  			latestCheckpoint := availableCheckpoints[len(availableCheckpoints)-1]
   187  
   188  			w.log.Info().Int("checkpoint", latestCheckpoint).Msg("loading checkpoint")
   189  
   190  			forestSequencing, err := checkpointer.LoadCheckpoint(latestCheckpoint)
   191  			if err != nil {
   192  				w.log.Warn().Int("checkpoint", latestCheckpoint).Err(err).
   193  					Msg("checkpoint loading failed")
   194  
   195  				availableCheckpoints = availableCheckpoints[:len(availableCheckpoints)-1]
   196  				continue
   197  			}
   198  
   199  			if len(forestSequencing) == 0 {
   200  				return fmt.Errorf("checkpoint loaded but has no trie")
   201  			}
   202  
   203  			firstTrie := forestSequencing[0].RootHash()
   204  			lastTrie := forestSequencing[len(forestSequencing)-1].RootHash()
   205  			w.log.Info().Int("checkpoint", latestCheckpoint).
   206  				Hex("first_trie", firstTrie[:]).
   207  				Hex("last_trie", lastTrie[:]).
   208  				Msg("checkpoint loaded")
   209  
   210  			err = checkpointFn(forestSequencing)
   211  			if err != nil {
   212  				return fmt.Errorf("error while handling checkpoint: %w", err)
   213  			}
   214  			loadedCheckpoint = latestCheckpoint
   215  			checkpointLoaded = true
   216  			break
   217  		}
   218  
   219  		if loadedCheckpoint != -1 && loadedCheckpoint == to {
   220  			w.log.Info().Msgf("no checkpoint to load")
   221  			return nil
   222  		}
   223  
   224  		if loadedCheckpoint >= 0 {
   225  			startSegment = loadedCheckpoint + 1
   226  		}
   227  
   228  		w.log.Info().
   229  			Int("start_segment", startSegment).
   230  			Msg("starting replay from checkpoint segment")
   231  	}
   232  
   233  	if loadedCheckpoint == -1 && startSegment == 0 {
   234  		hasRootCheckpoint, err := checkpointer.HasRootCheckpoint()
   235  		if err != nil {
   236  			return fmt.Errorf("cannot check root checkpoint existence: %w", err)
   237  		}
   238  		if hasRootCheckpoint {
   239  			w.log.Info().Msgf("loading root checkpoint")
   240  
   241  			flattenedForest, err := checkpointer.LoadRootCheckpoint()
   242  			if err != nil {
   243  				return fmt.Errorf("cannot load root checkpoint: %w", err)
   244  			}
   245  			err = checkpointFn(flattenedForest)
   246  			if err != nil {
   247  				return fmt.Errorf("error while handling root checkpoint: %w", err)
   248  			}
   249  
   250  			rootHash := flattenedForest[len(flattenedForest)-1].RootHash()
   251  			w.log.Info().
   252  				Hex("root_hash", rootHash[:]).
   253  				Msg("root checkpoint loaded")
   254  			checkpointLoaded = true
   255  		} else {
   256  			w.log.Info().Msgf("no root checkpoint was found")
   257  		}
   258  	}
   259  
   260  	w.log.Info().
   261  		Bool("checkpoint_loaded", checkpointLoaded).
   262  		Int("loaded_checkpoint", loadedCheckpoint).
   263  		Msgf("replaying segments from %d to %d", startSegment, to)
   264  
   265  	sr, err := prometheusWAL.NewSegmentsRangeReader(w.log, prometheusWAL.SegmentRange{
   266  		Dir:   w.wal.Dir(),
   267  		First: startSegment,
   268  		Last:  to,
   269  	})
   270  	if err != nil {
   271  		return fmt.Errorf("cannot create segment reader: %w", err)
   272  	}
   273  
   274  	reader := prometheusWAL.NewReader(sr)
   275  
   276  	defer sr.Close()
   277  
   278  	for reader.Next() {
   279  		record := reader.Record()
   280  		operation, rootHash, update, err := Decode(record)
   281  		if err != nil {
   282  			return fmt.Errorf("cannot decode LedgerWAL record: %w", err)
   283  		}
   284  
   285  		switch operation {
   286  		case WALUpdate:
   287  			err = updateFn(update)
   288  			if err != nil {
   289  				return fmt.Errorf("error while processing LedgerWAL update: %w", err)
   290  			}
   291  		case WALDelete:
   292  			err = deleteFn(rootHash)
   293  			if err != nil {
   294  				return fmt.Errorf("error while processing LedgerWAL deletion: %w", err)
   295  			}
   296  		}
   297  
   298  		err = reader.Err()
   299  		if err != nil {
   300  			return fmt.Errorf("cannot read LedgerWAL: %w", err)
   301  		}
   302  	}
   303  
   304  	w.log.Info().Msgf("finished loading checkpoint and replaying WAL from %d to %d", from, to)
   305  
   306  	return nil
   307  }
   308  
   309  func getPossibleCheckpoints(allCheckpoints []int, from, to int) []int {
   310  	// list of checkpoints is sorted
   311  	indexFrom := sort.SearchInts(allCheckpoints, from)
   312  	indexTo := sort.SearchInts(allCheckpoints, to)
   313  
   314  	// all checkpoints are earlier, return last one
   315  	if indexTo == len(allCheckpoints) {
   316  		return allCheckpoints[indexFrom:indexTo]
   317  	}
   318  
   319  	// exact match
   320  	if allCheckpoints[indexTo] == to {
   321  		return allCheckpoints[indexFrom : indexTo+1]
   322  	}
   323  
   324  	// earliest checkpoint from list doesn't match, index 0 means no match at all
   325  	if indexTo == 0 {
   326  		return nil
   327  	}
   328  
   329  	return allCheckpoints[indexFrom:indexTo]
   330  }
   331  
   332  // NewCheckpointer returns a Checkpointer for this WAL
   333  func (w *DiskWAL) NewCheckpointer() (*Checkpointer, error) {
   334  	return NewCheckpointer(w, w.pathByteSize, w.forestCapacity), nil
   335  }
   336  
   337  func (w *DiskWAL) Ready() <-chan struct{} {
   338  	ready := make(chan struct{})
   339  	close(ready)
   340  	return ready
   341  }
   342  
   343  // Done implements interface module.ReadyDoneAware
   344  // it closes all the open write-ahead log files.
   345  func (w *DiskWAL) Done() <-chan struct{} {
   346  	err := w.wal.Close()
   347  	if err != nil {
   348  		w.log.Err(err).Msg("error while closing WAL")
   349  	}
   350  	done := make(chan struct{})
   351  	close(done)
   352  	return done
   353  }
   354  
   355  type LedgerWAL interface {
   356  	module.ReadyDoneAware
   357  
   358  	NewCheckpointer() (*Checkpointer, error)
   359  	PauseRecord()
   360  	UnpauseRecord()
   361  	RecordUpdate(update *ledger.TrieUpdate) (int, bool, error)
   362  	RecordDelete(rootHash ledger.RootHash) error
   363  	ReplayOnForest(forest *mtrie.Forest) error
   364  	Segments() (first, last int, err error)
   365  	Replay(
   366  		checkpointFn func(tries []*trie.MTrie) error,
   367  		updateFn func(update *ledger.TrieUpdate) error,
   368  		deleteFn func(ledger.RootHash) error,
   369  	) error
   370  	ReplayLogsOnly(
   371  		checkpointFn func(tries []*trie.MTrie) error,
   372  		updateFn func(update *ledger.TrieUpdate) error,
   373  		deleteFn func(rootHash ledger.RootHash) error,
   374  	) error
   375  }