github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/ledger/complete/wal/checkpointer_test.go (about)

     1  package wal_test
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math"
     7  	"math/rand"
     8  	"os"
     9  	"path"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/rs/zerolog"
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  	"go.uber.org/atomic"
    17  
    18  	"github.com/onflow/flow-go/ledger"
    19  	"github.com/onflow/flow-go/ledger/common/pathfinder"
    20  	"github.com/onflow/flow-go/ledger/common/testutils"
    21  	"github.com/onflow/flow-go/ledger/complete"
    22  	"github.com/onflow/flow-go/ledger/complete/mtrie"
    23  	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
    24  	realWAL "github.com/onflow/flow-go/ledger/complete/wal"
    25  	"github.com/onflow/flow-go/ledger/complete/wal/fixtures"
    26  	"github.com/onflow/flow-go/module/metrics"
    27  	"github.com/onflow/flow-go/utils/unittest"
    28  )
    29  
    30  const (
    31  	numInsPerStep      = 2
    32  	keyNumberOfParts   = 10
    33  	keyPartMinByteSize = 1
    34  	keyPartMaxByteSize = 100
    35  	valueMaxByteSize   = 2 << 16 //16kB
    36  	size               = 10
    37  	segmentSize        = 32 * 1024
    38  	pathByteSize       = 32
    39  	pathFinderVersion  = uint8(complete.DefaultPathFinderVersion)
    40  )
    41  
    42  var (
    43  	logger           = zerolog.Logger{}
    44  	metricsCollector = &metrics.NoopCollector{}
    45  )
    46  
    47  func Test_WAL(t *testing.T) {
    48  
    49  	unittest.RunWithTempDir(t, func(dir string) {
    50  
    51  		const (
    52  			checkpointDistance = math.MaxInt // A large number to prevent checkpoint creation.
    53  			checkpointsToKeep  = 1
    54  		)
    55  
    56  		diskWal, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metricsCollector, dir, size, pathfinder.PathByteSize, realWAL.SegmentSize)
    57  		require.NoError(t, err)
    58  
    59  		led, err := complete.NewLedger(diskWal, size*10, metricsCollector, logger, complete.DefaultPathFinderVersion)
    60  		require.NoError(t, err)
    61  
    62  		compactor, err := complete.NewCompactor(led, diskWal, unittest.Logger(), size, checkpointDistance, checkpointsToKeep, atomic.NewBool(false), metrics.NewNoopCollector())
    63  		require.NoError(t, err)
    64  
    65  		<-compactor.Ready()
    66  
    67  		var state = led.InitialState()
    68  
    69  		//saved data after updates
    70  		savedData := make(map[string]map[string]ledger.Value)
    71  
    72  		// WAL segments are 32kB, so here we generate 2 keys 16kB each, times `size`
    73  		// so we should get at least `size` segments
    74  
    75  		for i := 0; i < size; i++ {
    76  
    77  			keys := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, keyPartMinByteSize, keyPartMaxByteSize)
    78  			values := testutils.RandomValues(numInsPerStep, valueMaxByteSize/2, valueMaxByteSize)
    79  			update, err := ledger.NewUpdate(state, keys, values)
    80  			require.NoError(t, err)
    81  			state, _, err = led.Set(update)
    82  			require.NoError(t, err)
    83  
    84  			data := make(map[string]ledger.Value, len(keys))
    85  			for j, key := range keys {
    86  				data[string(ledger.EncodeKey(&key))] = values[j]
    87  			}
    88  
    89  			savedData[string(state[:])] = data
    90  		}
    91  
    92  		<-led.Done()
    93  		<-compactor.Done()
    94  
    95  		diskWal2, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metricsCollector, dir, size, pathfinder.PathByteSize, realWAL.SegmentSize)
    96  		require.NoError(t, err)
    97  		led2, err := complete.NewLedger(diskWal2, (size*10)+10, metricsCollector, logger, complete.DefaultPathFinderVersion)
    98  		require.NoError(t, err)
    99  		compactor2 := fixtures.NewNoopCompactor(led2) // noop compactor is used because no write is needed.
   100  		<-compactor2.Ready()
   101  
   102  		// random map iteration order is a benefit here
   103  		for state, data := range savedData {
   104  
   105  			keys := make([]ledger.Key, 0, len(data))
   106  			for keyString := range data {
   107  				key, err := ledger.DecodeKey([]byte(keyString))
   108  				require.NoError(t, err)
   109  				keys = append(keys, *key)
   110  			}
   111  
   112  			var ledgerState ledger.State
   113  			copy(ledgerState[:], state)
   114  			query, err := ledger.NewQuery(ledgerState, keys)
   115  			require.NoError(t, err)
   116  			values, err := led2.Get(query)
   117  			require.NoError(t, err)
   118  
   119  			for i, key := range keys {
   120  				assert.Equal(t, data[string(ledger.EncodeKey(&key))], values[i])
   121  			}
   122  		}
   123  
   124  		<-led2.Done()
   125  		<-compactor2.Done()
   126  	})
   127  }
   128  
   129  func Test_Checkpointing(t *testing.T) {
   130  
   131  	unittest.RunWithTempDir(t, func(dir string) {
   132  
   133  		f, err := mtrie.NewForest(size*10, metricsCollector, nil)
   134  		require.NoError(t, err)
   135  
   136  		var rootHash = f.GetEmptyRootHash()
   137  
   138  		//saved data after updates
   139  		savedData := make(map[ledger.RootHash]map[ledger.Path]*ledger.Payload)
   140  
   141  		t.Run("create WAL and initial trie", func(t *testing.T) {
   142  
   143  			wal, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   144  			require.NoError(t, err)
   145  
   146  			// WAL segments are 32kB, so here we generate 2 keys 64kB each, times `size`
   147  			// so we should get at least `size` segments
   148  
   149  			// Generate the tree and create WAL
   150  			for i := 0; i < size; i++ {
   151  
   152  				keys := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, 1600, 1600)
   153  				values := testutils.RandomValues(numInsPerStep, valueMaxByteSize/2, valueMaxByteSize)
   154  				update, err := ledger.NewUpdate(ledger.State(rootHash), keys, values)
   155  				require.NoError(t, err)
   156  
   157  				trieUpdate, err := pathfinder.UpdateToTrieUpdate(update, pathFinderVersion)
   158  				require.NoError(t, err)
   159  
   160  				_, _, err = wal.RecordUpdate(trieUpdate)
   161  				require.NoError(t, err)
   162  
   163  				rootHash, err := f.Update(trieUpdate)
   164  				require.NoError(t, err)
   165  
   166  				fmt.Printf("Updated with %x\n", rootHash)
   167  
   168  				data := make(map[ledger.Path]*ledger.Payload, len(trieUpdate.Paths))
   169  				for j, path := range trieUpdate.Paths {
   170  					data[path] = trieUpdate.Payloads[j]
   171  				}
   172  
   173  				savedData[rootHash] = data
   174  			}
   175  			// some buffer time of the checkpointer to run
   176  			time.Sleep(1 * time.Second)
   177  			<-wal.Done()
   178  
   179  			require.FileExists(t, path.Join(dir, "00000010")) //make sure we have enough segments saved
   180  		})
   181  
   182  		// create a new forest and replay WAL
   183  		f2, err := mtrie.NewForest(size*10, metricsCollector, nil)
   184  		require.NoError(t, err)
   185  
   186  		t.Run("replay WAL and create checkpoint", func(t *testing.T) {
   187  
   188  			require.NoFileExists(t, path.Join(dir, "checkpoint.00000010"))
   189  
   190  			wal2, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   191  			require.NoError(t, err)
   192  
   193  			err = wal2.Replay(
   194  				func(tries []*trie.MTrie) error {
   195  					return fmt.Errorf("I should fail as there should be no checkpoints")
   196  				},
   197  				func(update *ledger.TrieUpdate) error {
   198  					_, err := f2.Update(update)
   199  					return err
   200  				},
   201  				func(rootHash ledger.RootHash) error {
   202  					return fmt.Errorf("I should fail as there should be no deletions")
   203  				},
   204  			)
   205  			require.NoError(t, err)
   206  
   207  			checkpointer, err := wal2.NewCheckpointer()
   208  			require.NoError(t, err)
   209  
   210  			require.NoFileExists(t, path.Join(dir, "checkpoint.00000010"))
   211  
   212  			err = checkpointer.Checkpoint(10)
   213  			require.NoError(t, err)
   214  
   215  			require.FileExists(t, path.Join(dir, "checkpoint.00000010")) //make sure we have checkpoint file
   216  
   217  			<-wal2.Done()
   218  		})
   219  
   220  		f3, err := mtrie.NewForest(size*10, metricsCollector, nil)
   221  		require.NoError(t, err)
   222  
   223  		t.Run("read checkpoint", func(t *testing.T) {
   224  			wal3, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   225  			require.NoError(t, err)
   226  
   227  			err = wal3.Replay(
   228  				func(tries []*trie.MTrie) error {
   229  					return f3.AddTries(tries)
   230  				},
   231  				func(update *ledger.TrieUpdate) error {
   232  					return fmt.Errorf("I should fail as there should be no updates")
   233  				},
   234  				func(rootHash ledger.RootHash) error {
   235  					return fmt.Errorf("I should fail as there should be no deletions")
   236  				},
   237  			)
   238  			require.NoError(t, err)
   239  
   240  			<-wal3.Done()
   241  		})
   242  
   243  		t.Run("all forests contain the same data", func(t *testing.T) {
   244  			// random map iteration order is a benefit here
   245  			// make sure the tries has been rebuilt from WAL and another from from Checkpoint
   246  			// f1, f2 and f3 should be identical
   247  			for rootHash, data := range savedData {
   248  
   249  				paths := make([]ledger.Path, 0, len(data))
   250  				for path := range data {
   251  					paths = append(paths, path)
   252  				}
   253  
   254  				values1, err := f.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths})
   255  				require.NoError(t, err)
   256  
   257  				values2, err := f2.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths})
   258  				require.NoError(t, err)
   259  
   260  				values3, err := f3.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths})
   261  				require.NoError(t, err)
   262  
   263  				for i, path := range paths {
   264  					require.Equal(t, data[path].Value(), values1[i])
   265  					require.Equal(t, data[path].Value(), values2[i])
   266  					require.Equal(t, data[path].Value(), values3[i])
   267  				}
   268  			}
   269  		})
   270  
   271  		keys2 := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, keyPartMinByteSize, keyPartMaxByteSize)
   272  		values2 := testutils.RandomValues(numInsPerStep, 1, valueMaxByteSize)
   273  		t.Run("create segment after checkpoint", func(t *testing.T) {
   274  
   275  			//require.NoFileExists(t, path.Join(dir, "00000011"))
   276  
   277  			unittest.RequireFileEmpty(t, path.Join(dir, "00000011"))
   278  
   279  			//generate one more segment
   280  			wal4, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   281  			require.NoError(t, err)
   282  
   283  			update, err := ledger.NewUpdate(ledger.State(rootHash), keys2, values2)
   284  			require.NoError(t, err)
   285  
   286  			trieUpdate, err := pathfinder.UpdateToTrieUpdate(update, pathFinderVersion)
   287  			require.NoError(t, err)
   288  
   289  			_, _, err = wal4.RecordUpdate(trieUpdate)
   290  			require.NoError(t, err)
   291  
   292  			rootHash, err = f.Update(trieUpdate)
   293  			require.NoError(t, err)
   294  
   295  			<-wal4.Done()
   296  
   297  			require.FileExists(t, path.Join(dir, "00000011")) //make sure we have extra segment
   298  		})
   299  
   300  		f5, err := mtrie.NewForest(size*10, metricsCollector, nil)
   301  		require.NoError(t, err)
   302  
   303  		t.Run("replay both checkpoint and updates after checkpoint", func(t *testing.T) {
   304  			wal5, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   305  			require.NoError(t, err)
   306  
   307  			updatesLeft := 1 // there should be only one update
   308  
   309  			err = wal5.Replay(
   310  				func(tries []*trie.MTrie) error {
   311  					return f5.AddTries(tries)
   312  				},
   313  				func(update *ledger.TrieUpdate) error {
   314  					if updatesLeft == 0 {
   315  						return fmt.Errorf("more updates called then expected")
   316  					}
   317  					_, err := f5.Update(update)
   318  					updatesLeft--
   319  					return err
   320  				},
   321  				func(rootHash ledger.RootHash) error {
   322  					return fmt.Errorf("I should fail as there should be no deletions")
   323  				},
   324  			)
   325  			require.NoError(t, err)
   326  
   327  			<-wal5.Done()
   328  		})
   329  
   330  		t.Run("extra updates were applied correctly", func(t *testing.T) {
   331  
   332  			query, err := ledger.NewQuery(ledger.State(rootHash), keys2)
   333  			require.NoError(t, err)
   334  			trieRead, err := pathfinder.QueryToTrieRead(query, pathFinderVersion)
   335  			require.NoError(t, err)
   336  
   337  			values, err := f.Read(trieRead)
   338  			require.NoError(t, err)
   339  
   340  			values5, err := f5.Read(trieRead)
   341  			require.NoError(t, err)
   342  
   343  			for i := range keys2 {
   344  				require.Equal(t, values2[i], values[i])
   345  				require.Equal(t, values2[i], values5[i])
   346  			}
   347  		})
   348  
   349  		t.Run("corrupted checkpoints are skipped", func(t *testing.T) {
   350  
   351  			f6, err := mtrie.NewForest(size*10, metricsCollector, nil)
   352  			require.NoError(t, err)
   353  
   354  			wal6, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   355  			require.NoError(t, err)
   356  
   357  			// make sure no earlier checkpoints exist
   358  			require.NoFileExists(t, path.Join(dir, "checkpoint.0000008"))
   359  			require.NoFileExists(t, path.Join(dir, "checkpoint.0000006"))
   360  			require.NoFileExists(t, path.Join(dir, "checkpoint.0000004"))
   361  
   362  			require.FileExists(t, path.Join(dir, "checkpoint.00000010"))
   363  
   364  			// create missing checkpoints
   365  			checkpointer, err := wal6.NewCheckpointer()
   366  			require.NoError(t, err)
   367  
   368  			err = checkpointer.Checkpoint(4)
   369  			require.NoError(t, err)
   370  			require.FileExists(t, path.Join(dir, "checkpoint.00000004"))
   371  
   372  			err = checkpointer.Checkpoint(6)
   373  			require.NoError(t, err)
   374  			require.FileExists(t, path.Join(dir, "checkpoint.00000006"))
   375  
   376  			err = checkpointer.Checkpoint(8)
   377  			require.NoError(t, err)
   378  			require.FileExists(t, path.Join(dir, "checkpoint.00000008"))
   379  
   380  			// corrupt checkpoints
   381  			randomlyModifyFile(t, path.Join(dir, "checkpoint.00000006"))
   382  			randomlyModifyFile(t, path.Join(dir, "checkpoint.00000008"))
   383  			randomlyModifyFile(t, path.Join(dir, "checkpoint.00000010"))
   384  
   385  			// make sure 10 is latest checkpoint
   386  			latestCheckpoint, err := checkpointer.LatestCheckpoint()
   387  			require.NoError(t, err)
   388  			require.Equal(t, 10, latestCheckpoint)
   389  
   390  			// at this stage, number 4 should be the latest valid checkpoint
   391  			// check other fail to load
   392  
   393  			_, err = checkpointer.LoadCheckpoint(10)
   394  			require.Error(t, err)
   395  			_, err = checkpointer.LoadCheckpoint(8)
   396  			require.Error(t, err)
   397  			_, err = checkpointer.LoadCheckpoint(6)
   398  			require.Error(t, err)
   399  			_, err = checkpointer.LoadCheckpoint(4)
   400  			require.NoError(t, err)
   401  
   402  			err = wal6.ReplayOnForest(f6)
   403  			require.NoError(t, err)
   404  
   405  			<-wal6.Done()
   406  
   407  			// check if the latest data is still there
   408  			query, err := ledger.NewQuery(ledger.State(rootHash), keys2)
   409  			require.NoError(t, err)
   410  			trieRead, err := pathfinder.QueryToTrieRead(query, pathFinderVersion)
   411  			require.NoError(t, err)
   412  
   413  			values, err := f.Read(trieRead)
   414  			require.NoError(t, err)
   415  
   416  			values6, err := f6.Read(trieRead)
   417  			require.NoError(t, err)
   418  
   419  			for i := range keys2 {
   420  				require.Equal(t, values2[i], values[i])
   421  				require.Equal(t, values2[i], values6[i])
   422  			}
   423  
   424  		})
   425  
   426  	})
   427  }
   428  
   429  // func TestCheckpointFileError(t *testing.T) {
   430  //
   431  // 	unittest.RunWithTempDir(t, func(dir string) {
   432  //
   433  // 		wal, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   434  // 		require.NoError(t, err)
   435  //
   436  // 		// create WAL
   437  //
   438  // 		keys := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, 1600, 1600)
   439  // 		values := testutils.RandomValues(numInsPerStep, valueMaxByteSize/2, valueMaxByteSize)
   440  // 		update, err := ledger.NewUpdate(ledger.State(trie.EmptyTrieRootHash()), keys, values)
   441  // 		require.NoError(t, err)
   442  //
   443  // 		trieUpdate, err := pathfinder.UpdateToTrieUpdate(update, pathFinderVersion)
   444  // 		require.NoError(t, err)
   445  //
   446  // 		_, _, err = wal.RecordUpdate(trieUpdate)
   447  // 		require.NoError(t, err)
   448  //
   449  // 		// some buffer time of the checkpointer to run
   450  // 		time.Sleep(1 * time.Second)
   451  // 		<-wal.Done()
   452  //
   453  // 		require.FileExists(t, path.Join(dir, "00000001")) //make sure WAL segment is saved
   454  //
   455  // 		wal2, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
   456  // 		require.NoError(t, err)
   457  //
   458  // 		checkpointer, err := wal2.NewCheckpointer()
   459  // 		require.NoError(t, err)
   460  //
   461  // 		t.Run("write error", func(t *testing.T) {
   462  // 			errWrite := errors.New("unexpected write error")
   463  //
   464  // 			err = checkpointer.Checkpoint(1, func() (io.WriteCloser, error) {
   465  // 				return newWriteCloserWithErrors(errWrite, nil), nil
   466  // 			})
   467  // 			require.ErrorIs(t, err, errWrite)
   468  // 		})
   469  //
   470  // 		t.Run("close error", func(t *testing.T) {
   471  // 			errClose := errors.New("unexpected close error")
   472  //
   473  // 			err = checkpointer.Checkpoint(1, func() (io.WriteCloser, error) {
   474  // 				return newWriteCloserWithErrors(nil, errClose), nil
   475  // 			})
   476  // 			require.ErrorIs(t, err, errClose)
   477  // 		})
   478  // 	})
   479  // }
   480  
   481  // randomlyModifyFile picks random byte and modifies it
   482  // this should be enough to cause checkpoint loading to fail
   483  // as it contains checksum
   484  func randomlyModifyFile(t *testing.T, filename string) {
   485  
   486  	file, err := os.OpenFile(filename, os.O_RDWR, 0644)
   487  	require.NoError(t, err)
   488  
   489  	fileInfo, err := file.Stat()
   490  	require.NoError(t, err)
   491  
   492  	fileSize := fileInfo.Size()
   493  
   494  	buf := make([]byte, 1)
   495  
   496  	// get some random offset
   497  	offset := int64(rand.Int()) % (fileSize + int64(len(buf)))
   498  
   499  	_, err = file.ReadAt(buf, offset)
   500  	require.NoError(t, err)
   501  
   502  	// byte addition will simply wrap around
   503  	buf[0]++
   504  
   505  	_, err = file.WriteAt(buf, offset)
   506  	require.NoError(t, err)
   507  }
   508  
   509  func Test_StoringLoadingCheckpoints(t *testing.T) {
   510  
   511  	unittest.RunWithTempDir(t, func(dir string) {
   512  		// some hash will be literally encoded in output file
   513  		// so we can find it and modify - to make sure we get a different checksum
   514  		// but not fail process by, for example, modifying saved data length causing EOF
   515  
   516  		emptyTrie := trie.NewEmptyMTrie()
   517  
   518  		p1 := testutils.PathByUint8(0)
   519  		v1 := testutils.LightPayload8('A', 'a')
   520  
   521  		p2 := testutils.PathByUint8(1)
   522  		v2 := testutils.LightPayload8('B', 'b')
   523  
   524  		paths := []ledger.Path{p1, p2}
   525  		payloads := []ledger.Payload{*v1, *v2}
   526  
   527  		updatedTrie, _, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
   528  		require.NoError(t, err)
   529  
   530  		someHash := updatedTrie.RootNode().LeftChild().Hash() // Hash of left child
   531  
   532  		fullpath := path.Join(dir, "temp-checkpoint")
   533  
   534  		err = realWAL.StoreCheckpointV5(dir, "temp-checkpoint", logger, updatedTrie)
   535  		require.NoError(t, err)
   536  
   537  		t.Run("works without data modification", func(t *testing.T) {
   538  			logger := unittest.Logger()
   539  			tries, err := realWAL.LoadCheckpoint(fullpath, logger)
   540  			require.NoError(t, err)
   541  			require.Equal(t, 1, len(tries))
   542  			require.Equal(t, updatedTrie, tries[0])
   543  		})
   544  
   545  		t.Run("detects modified data", func(t *testing.T) {
   546  			b, err := os.ReadFile(fullpath)
   547  			require.NoError(t, err)
   548  
   549  			index := bytes.Index(b, someHash[:])
   550  			require.NotEqual(t, -1, index)
   551  			b[index] = 23
   552  
   553  			err = os.WriteFile(fullpath, b, 0644)
   554  			require.NoError(t, err)
   555  
   556  			logger := unittest.Logger()
   557  			tries, err := realWAL.LoadCheckpoint(fullpath, logger)
   558  			require.Error(t, err)
   559  			require.Nil(t, tries)
   560  			require.Contains(t, err.Error(), "checksum")
   561  		})
   562  	})
   563  }
   564  
   565  type writeCloserWithErrors struct {
   566  	writeError error
   567  	closeError error
   568  }
   569  
   570  func newWriteCloserWithErrors(writeError error, closeError error) *writeCloserWithErrors {
   571  	return &writeCloserWithErrors{
   572  		writeError: writeError,
   573  		closeError: closeError,
   574  	}
   575  }
   576  
   577  func (wc *writeCloserWithErrors) Write(p []byte) (n int, err error) {
   578  	return 0, wc.writeError
   579  }
   580  
   581  func (wc *writeCloserWithErrors) Close() error {
   582  	return wc.closeError
   583  }