github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/repair_test.go (about)

     1  // +build integration
     2  
     3  // Copyright (c) 2019 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package integration
    24  
    25  import (
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/dbnode/integration/generate"
    30  	"github.com/m3db/m3/src/dbnode/namespace"
    31  	"github.com/m3db/m3/src/dbnode/retention"
    32  	"github.com/m3db/m3/src/x/ident"
    33  	xtest "github.com/m3db/m3/src/x/test"
    34  	xtime "github.com/m3db/m3/src/x/time"
    35  
    36  	"github.com/stretchr/testify/require"
    37  )
    38  
    39  func TestRepairDisjointSeries(t *testing.T) {
    40  	genRepairData := func(now xtime.UnixNano, blockSize time.Duration) (
    41  		node0Data generate.SeriesBlocksByStart,
    42  		node1Data generate.SeriesBlocksByStart,
    43  		node2Data generate.SeriesBlocksByStart,
    44  		allData generate.SeriesBlocksByStart,
    45  	) {
    46  		currBlockStart := now.Truncate(blockSize)
    47  		node0Data = generate.BlocksByStart([]generate.BlockConfig{
    48  			{IDs: []string{"foo"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    49  		})
    50  		node1Data = generate.BlocksByStart([]generate.BlockConfig{
    51  			{IDs: []string{"bar"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    52  		})
    53  
    54  		allData = make(map[xtime.UnixNano]generate.SeriesBlock)
    55  		for start, data := range node0Data {
    56  			for _, series := range data {
    57  				allData[start] = append(allData[start], series)
    58  			}
    59  		}
    60  		for start, data := range node1Data {
    61  			for _, series := range data {
    62  				allData[start] = append(allData[start], series)
    63  			}
    64  		}
    65  		for start, data := range node2Data {
    66  			for _, series := range data {
    67  				allData[start] = append(allData[start], series)
    68  			}
    69  		}
    70  
    71  		return node0Data, node1Data, node2Data, allData
    72  	}
    73  
    74  	testRepair(t, genRepairData, testRepairOptions{})
    75  }
    76  
    77  func TestRepairMergeSeries(t *testing.T) {
    78  	genRepairData := func(now xtime.UnixNano, blockSize time.Duration) (
    79  		node0Data generate.SeriesBlocksByStart,
    80  		node1Data generate.SeriesBlocksByStart,
    81  		node2Data generate.SeriesBlocksByStart,
    82  		allData generate.SeriesBlocksByStart,
    83  	) {
    84  		currBlockStart := now.Truncate(blockSize)
    85  		allData = generate.BlocksByStart([]generate.BlockConfig{
    86  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    87  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)},
    88  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-2 * blockSize)},
    89  		})
    90  		node0Data = make(map[xtime.UnixNano]generate.SeriesBlock)
    91  		node1Data = make(map[xtime.UnixNano]generate.SeriesBlock)
    92  
    93  		remainder := 0
    94  		appendSeries := func(target map[xtime.UnixNano]generate.SeriesBlock, start time.Time, s generate.Series) {
    95  			var dataWithMissing []generate.TestValue
    96  			for i := range s.Data {
    97  				if i%2 != remainder {
    98  					continue
    99  				}
   100  				dataWithMissing = append(dataWithMissing, s.Data[i])
   101  			}
   102  			target[xtime.ToUnixNano(start)] = append(
   103  				target[xtime.ToUnixNano(start)],
   104  				generate.Series{ID: s.ID, Data: dataWithMissing},
   105  			)
   106  			remainder = 1 - remainder
   107  		}
   108  		for start, data := range allData {
   109  			for _, series := range data {
   110  				appendSeries(node0Data, start.ToTime(), series)
   111  				appendSeries(node1Data, start.ToTime(), series)
   112  			}
   113  		}
   114  
   115  		return node0Data, node1Data, node2Data, allData
   116  	}
   117  
   118  	testRepair(t, genRepairData, testRepairOptions{})
   119  }
   120  
   121  func TestRepairDoesNotRepairCurrentBlock(t *testing.T) {
   122  	genRepairData := func(now xtime.UnixNano, blockSize time.Duration) (
   123  		node0Data generate.SeriesBlocksByStart,
   124  		node1Data generate.SeriesBlocksByStart,
   125  		node2Data generate.SeriesBlocksByStart,
   126  		allData generate.SeriesBlocksByStart,
   127  	) {
   128  		currBlockStart := now.Truncate(blockSize)
   129  		node0Data = generate.BlocksByStart([]generate.BlockConfig{
   130  			// Write in previous block should be repaired.
   131  			{IDs: []string{"prevBlock1", "prevBlock2"}, NumPoints: 1, Start: currBlockStart.Add(-blockSize)},
   132  			// Write in current block, should not be repaired.
   133  			{IDs: []string{"currBlock1", "currBlock2"}, NumPoints: 1, Start: currBlockStart},
   134  		})
   135  
   136  		allData = make(map[xtime.UnixNano]generate.SeriesBlock)
   137  		for start, data := range node0Data {
   138  			if !start.Equal(currBlockStart) {
   139  				allData[start] = data
   140  			}
   141  		}
   142  		require.Equal(t, 1, len(allData))
   143  
   144  		return node0Data, node1Data, node2Data, allData
   145  	}
   146  
   147  	currBlockSeries := []ident.ID{ident.StringID("currBlock1"), ident.StringID("currBlock2")}
   148  	testRepairOpts := testRepairOptions{
   149  		node1ShouldNotContainSeries: currBlockSeries,
   150  		node2ShouldNotContainSeries: currBlockSeries,
   151  	}
   152  	testRepair(t, genRepairData, testRepairOpts)
   153  }
   154  
   155  type genRepairDatafn func(
   156  	now xtime.UnixNano,
   157  	blockSize time.Duration,
   158  ) (
   159  	node0Data generate.SeriesBlocksByStart,
   160  	node1Data generate.SeriesBlocksByStart,
   161  	node2Data generate.SeriesBlocksByStart,
   162  	allData generate.SeriesBlocksByStart)
   163  
   164  type testRepairOptions struct {
   165  	node0ShouldNotContainSeries []ident.ID
   166  	node1ShouldNotContainSeries []ident.ID
   167  	node2ShouldNotContainSeries []ident.ID
   168  }
   169  
   170  func testRepair(
   171  	t *testing.T,
   172  	genRepairData genRepairDatafn,
   173  	testRepairOpts testRepairOptions,
   174  ) {
   175  	if testing.Short() {
   176  		t.SkipNow()
   177  	}
   178  
   179  	// Test setups.
   180  	log := xtest.NewLogger(t)
   181  	retentionOpts := retention.NewOptions().
   182  		SetRetentionPeriod(20 * time.Hour).
   183  		SetBlockSize(2 * time.Hour).
   184  		SetBufferPast(10 * time.Minute).
   185  		SetBufferFuture(2 * time.Minute)
   186  	nsOpts := namespace.NewOptions().
   187  		SetRepairEnabled(true).
   188  		// Explicitly ensure that the repair feature works even if cold writes is disabled
   189  		// at the namespace level.
   190  		SetColdWritesEnabled(false).
   191  		SetRetentionOptions(retentionOpts)
   192  	namesp, err := namespace.NewMetadata(testNamespaces[0], nsOpts)
   193  	require.NoError(t, err)
   194  	opts := NewTestOptions(t).
   195  		SetNamespaces([]namespace.Metadata{namesp}).
   196  		// Use TChannel clients for writing / reading because we want to target individual nodes at a time
   197  		// and not write/read all nodes in the cluster.
   198  		SetUseTChannelClientForWriting(true).
   199  		SetUseTChannelClientForReading(true)
   200  
   201  	setupOpts := []BootstrappableTestSetupOptions{
   202  		{
   203  			DisablePeersBootstrapper: true,
   204  			EnableRepairs:            true,
   205  		},
   206  		{
   207  			DisablePeersBootstrapper: true,
   208  			EnableRepairs:            true,
   209  		},
   210  		{
   211  			DisablePeersBootstrapper: true,
   212  			EnableRepairs:            true,
   213  		},
   214  	}
   215  	setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts)
   216  	defer closeFn()
   217  
   218  	// Ensure that the current time is set such that the previous block is flushable.
   219  	blockSize := retentionOpts.BlockSize()
   220  	now := setups[0].NowFn()().Truncate(blockSize).Add(retentionOpts.BufferPast()).Add(time.Second)
   221  	for _, setup := range setups {
   222  		setup.SetNowFn(now)
   223  	}
   224  
   225  	node0Data, node1Data, node2Data, allData := genRepairData(now, blockSize)
   226  	if node0Data != nil {
   227  		require.NoError(t, writeTestDataToDisk(namesp, setups[0], node0Data, 0))
   228  	}
   229  	if node1Data != nil {
   230  		require.NoError(t, writeTestDataToDisk(namesp, setups[1], node1Data, 0))
   231  	}
   232  	if node2Data != nil {
   233  		require.NoError(t, writeTestDataToDisk(namesp, setups[2], node2Data, 0))
   234  	}
   235  
   236  	// Start the servers with filesystem bootstrappers.
   237  	setups.parallel(func(s TestSetup) {
   238  		if err := s.StartServer(); err != nil {
   239  			panic(err)
   240  		}
   241  	})
   242  	log.Debug("servers are now up")
   243  
   244  	// Stop the servers.
   245  	defer func() {
   246  		setups.parallel(func(s TestSetup) {
   247  			require.NoError(t, s.StopServer())
   248  		})
   249  		log.Debug("servers are now down")
   250  	}()
   251  
   252  	require.True(t, waitUntil(func() bool {
   253  		for _, setup := range setups {
   254  			if err := checkFlushedDataFiles(setup.ShardSet(), setup.StorageOpts(), namesp.ID(), allData); err != nil {
   255  				// Increment the time each time it fails to make sure background processes are able to proceed.
   256  				for _, s := range setups {
   257  					s.SetNowFn(s.NowFn()().Add(time.Millisecond))
   258  				}
   259  				return false
   260  			}
   261  		}
   262  		return true
   263  	}, 60*time.Second))
   264  
   265  	// Verify in-memory data matches what we expect.
   266  	verifySeriesMaps(t, setups[0], namesp.ID(), allData)
   267  	verifySeriesMaps(t, setups[1], namesp.ID(), allData)
   268  	verifySeriesMaps(t, setups[2], namesp.ID(), allData)
   269  
   270  	for _, seriesID := range testRepairOpts.node0ShouldNotContainSeries {
   271  		contains, err := containsSeries(setups[0], namesp.ID(), seriesID, now.Add(-retentionOpts.RetentionPeriod()), now)
   272  		require.NoError(t, err)
   273  		require.False(t, contains)
   274  	}
   275  	for _, seriesID := range testRepairOpts.node1ShouldNotContainSeries {
   276  		contains, err := containsSeries(setups[1], namesp.ID(), seriesID, now.Add(-retentionOpts.RetentionPeriod()), now)
   277  		require.NoError(t, err)
   278  		require.False(t, contains)
   279  	}
   280  	for _, seriesID := range testRepairOpts.node2ShouldNotContainSeries {
   281  		contains, err := containsSeries(setups[2], namesp.ID(), seriesID, now.Add(-retentionOpts.RetentionPeriod()), now)
   282  		require.NoError(t, err)
   283  		require.False(t, contains)
   284  	}
   285  }