github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/repair_force_only_compare_test.go (about)

     1  //go:build integration
     2  // +build integration
     3  
     4  // Copyright (c) 2021 Uber Technologies, Inc.
     5  //
     6  // Permission is hereby granted, free of charge, to any person obtaining a copy
     7  // of this software and associated documentation files (the "Software"), to deal
     8  // in the Software without restriction, including without limitation the rights
     9  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    10  // copies of the Software, and to permit persons to whom the Software is
    11  // furnished to do so, subject to the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be included in
    14  // all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    17  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    18  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    19  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    20  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    21  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    22  // THE SOFTWARE.
    23  
    24  package integration
    25  
    26  import (
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/dbnode/integration/generate"
    31  	"github.com/m3db/m3/src/dbnode/namespace"
    32  	"github.com/m3db/m3/src/dbnode/retention"
    33  	"github.com/m3db/m3/src/dbnode/storage/repair"
    34  	xtest "github.com/m3db/m3/src/x/test"
    35  	xtime "github.com/m3db/m3/src/x/time"
    36  
    37  	"github.com/stretchr/testify/require"
    38  	"github.com/uber-go/tally"
    39  )
    40  
    41  func TestRepairForceAndOnlyCompare(t *testing.T) {
    42  	if testing.Short() {
    43  		t.SkipNow()
    44  	}
    45  
    46  	// Test both disjoint and shared series repair.
    47  	genRepairData := genRepairDatafn(func(now xtime.UnixNano, blockSize time.Duration) (
    48  		node0Data generate.SeriesBlocksByStart,
    49  		node1Data generate.SeriesBlocksByStart,
    50  		node2Data generate.SeriesBlocksByStart,
    51  		allData generate.SeriesBlocksByStart,
    52  	) {
    53  		currBlockStart := now.Truncate(blockSize)
    54  		node0Data = generate.BlocksByStart([]generate.BlockConfig{
    55  			{IDs: []string{"foo"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    56  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)},
    57  		})
    58  		node1Data = generate.BlocksByStart([]generate.BlockConfig{
    59  			{IDs: []string{"bar"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    60  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)},
    61  		})
    62  
    63  		allData = make(map[xtime.UnixNano]generate.SeriesBlock)
    64  		for start, data := range node0Data {
    65  			allData[start] = append(allData[start], data...)
    66  		}
    67  		for start, data := range node1Data {
    68  			allData[start] = append(allData[start], data...)
    69  		}
    70  		for start, data := range node2Data {
    71  			allData[start] = append(allData[start], data...)
    72  		}
    73  
    74  		return node0Data, node1Data, node2Data, allData
    75  	})
    76  
    77  	// Test setups.
    78  	log := xtest.NewLogger(t)
    79  	retentionOpts := retention.NewOptions().
    80  		SetRetentionPeriod(20 * time.Hour).
    81  		SetBlockSize(2 * time.Hour).
    82  		SetBufferPast(10 * time.Minute).
    83  		SetBufferFuture(2 * time.Minute)
    84  	nsOpts := namespace.NewOptions().
    85  		// Test needing to force enable repairs.
    86  		SetRepairEnabled(false).
    87  		SetRetentionOptions(retentionOpts)
    88  	namesp, err := namespace.NewMetadata(testNamespaces[0], nsOpts)
    89  	require.NoError(t, err)
    90  	opts := NewTestOptions(t).
    91  		SetNamespaces([]namespace.Metadata{namesp}).
    92  		// Use TChannel clients for writing / reading because we want to target individual nodes at a time
    93  		// and not write/read all nodes in the cluster.
    94  		SetUseTChannelClientForWriting(true).
    95  		SetUseTChannelClientForReading(true)
    96  
    97  	setupOpts := []BootstrappableTestSetupOptions{
    98  		{
    99  			DisablePeersBootstrapper: true,
   100  			EnableRepairs:            true,
   101  			// Test forcing repair of type compare only repair.
   102  			ForceRepairs: true,
   103  			RepairType:   repair.OnlyCompareRepair,
   104  		},
   105  		{
   106  			DisablePeersBootstrapper: true,
   107  			EnableRepairs:            true,
   108  			// Test forcing repair of type compare only repair.
   109  			ForceRepairs: true,
   110  			RepairType:   repair.OnlyCompareRepair,
   111  		},
   112  		{
   113  			DisablePeersBootstrapper: true,
   114  			EnableRepairs:            true,
   115  			// Test forcing repair of type compare only repair.
   116  			ForceRepairs: true,
   117  			RepairType:   repair.OnlyCompareRepair,
   118  		},
   119  	}
   120  
   121  	// nolint: govet
   122  	setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts)
   123  	defer closeFn()
   124  
   125  	// Ensure that the current time is set such that the previous block is flushable.
   126  	blockSize := retentionOpts.BlockSize()
   127  	now := setups[0].NowFn()().Truncate(blockSize).Add(retentionOpts.BufferPast()).Add(time.Second)
   128  	for _, setup := range setups {
   129  		setup.SetNowFn(now)
   130  	}
   131  
   132  	node0Data, node1Data, node2Data, _ := genRepairData(now, blockSize)
   133  	if node0Data != nil {
   134  		require.NoError(t, writeTestDataToDisk(namesp, setups[0], node0Data, 0))
   135  	}
   136  	if node1Data != nil {
   137  		require.NoError(t, writeTestDataToDisk(namesp, setups[1], node1Data, 0))
   138  	}
   139  	if node2Data != nil {
   140  		require.NoError(t, writeTestDataToDisk(namesp, setups[2], node2Data, 0))
   141  	}
   142  
   143  	// Start the servers with filesystem bootstrappers.
   144  	setups.parallel(func(s TestSetup) {
   145  		if err := s.StartServer(); err != nil {
   146  			panic(err)
   147  		}
   148  	})
   149  	log.Debug("servers are now up")
   150  
   151  	// Stop the servers.
   152  	defer func() {
   153  		setups.parallel(func(s TestSetup) {
   154  			require.NoError(t, s.StopServer())
   155  		})
   156  		log.Debug("servers are now down")
   157  	}()
   158  
   159  	// Wait for repairs to occur at least once per node.
   160  	log.Debug("waiting for repairs to run")
   161  	var runSuccessPerNodeCounters []tally.CounterSnapshot
   162  	require.True(t, waitUntil(func() bool {
   163  		var successCounters []tally.CounterSnapshot
   164  		for _, setup := range setups {
   165  			scope := setup.Scope()
   166  			for _, v := range scope.Snapshot().Counters() {
   167  				if v.Name() != "repair.run" {
   168  					continue
   169  				}
   170  				repairType, ok := v.Tags()["repair_type"]
   171  				if !ok || repairType != "only_compare" {
   172  					continue
   173  				}
   174  				if v.Value() > 0 {
   175  					successCounters = append(successCounters, v)
   176  					break
   177  				}
   178  			}
   179  		}
   180  
   181  		// Check if all counters are success.
   182  		successAll := len(successCounters) == len(setups)
   183  		if successAll {
   184  			runSuccessPerNodeCounters = successCounters
   185  			return true
   186  		}
   187  		return false
   188  	}, 60*time.Second))
   189  
   190  	// Verify that the repair runs only ran comparisons without repairing data.
   191  	log.Debug("verifying repairs that ran")
   192  	require.Equal(t, len(setups), len(runSuccessPerNodeCounters),
   193  		"unexpected number of successful nodes ran repairs")
   194  	for _, counter := range runSuccessPerNodeCounters {
   195  		repairType, ok := counter.Tags()["repair_type"]
   196  		require.True(t, ok)
   197  		require.Equal(t, "only_compare", repairType)
   198  		require.True(t, counter.Value() > 0)
   199  	}
   200  
   201  	// Verify data did not change (repair type is compare only).
   202  	verifySeriesMaps(t, setups[0], namesp.ID(), node0Data)
   203  	verifySeriesMaps(t, setups[1], namesp.ID(), node1Data)
   204  	verifySeriesMaps(t, setups[2], namesp.ID(), node2Data)
   205  }