github.com/m3db/m3@v1.5.0/src/dbnode/integration/repair_force_only_compare_test.go (about)

     1  // +build integration
     2  
     3  // Copyright (c) 2021 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package integration
    24  
    25  import (
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/stretchr/testify/require"
    30  	"github.com/uber-go/tally"
    31  
    32  	"github.com/m3db/m3/src/dbnode/integration/generate"
    33  	"github.com/m3db/m3/src/dbnode/namespace"
    34  	"github.com/m3db/m3/src/dbnode/retention"
    35  	"github.com/m3db/m3/src/dbnode/storage/repair"
    36  	xtest "github.com/m3db/m3/src/x/test"
    37  	xtime "github.com/m3db/m3/src/x/time"
    38  )
    39  
    40  func TestRepairForceAndOnlyCompare(t *testing.T) {
    41  	if testing.Short() {
    42  		t.SkipNow()
    43  	}
    44  
    45  	// Test both disjoint and shared series repair.
    46  	genRepairData := genRepairDatafn(func(now xtime.UnixNano, blockSize time.Duration) (
    47  		node0Data generate.SeriesBlocksByStart,
    48  		node1Data generate.SeriesBlocksByStart,
    49  		node2Data generate.SeriesBlocksByStart,
    50  		allData generate.SeriesBlocksByStart,
    51  	) {
    52  		currBlockStart := now.Truncate(blockSize)
    53  		node0Data = generate.BlocksByStart([]generate.BlockConfig{
    54  			{IDs: []string{"foo"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    55  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)},
    56  		})
    57  		node1Data = generate.BlocksByStart([]generate.BlockConfig{
    58  			{IDs: []string{"bar"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)},
    59  			{IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)},
    60  		})
    61  
    62  		allData = make(map[xtime.UnixNano]generate.SeriesBlock)
    63  		for start, data := range node0Data {
    64  			allData[start] = append(allData[start], data...)
    65  		}
    66  		for start, data := range node1Data {
    67  			allData[start] = append(allData[start], data...)
    68  		}
    69  		for start, data := range node2Data {
    70  			allData[start] = append(allData[start], data...)
    71  		}
    72  
    73  		return node0Data, node1Data, node2Data, allData
    74  	})
    75  
    76  	// Test setups.
    77  	log := xtest.NewLogger(t)
    78  	retentionOpts := retention.NewOptions().
    79  		SetRetentionPeriod(20 * time.Hour).
    80  		SetBlockSize(2 * time.Hour).
    81  		SetBufferPast(10 * time.Minute).
    82  		SetBufferFuture(2 * time.Minute)
    83  	nsOpts := namespace.NewOptions().
    84  		// Test needing to force enable repairs.
    85  		SetRepairEnabled(false).
    86  		SetRetentionOptions(retentionOpts)
    87  	namesp, err := namespace.NewMetadata(testNamespaces[0], nsOpts)
    88  	require.NoError(t, err)
    89  	opts := NewTestOptions(t).
    90  		SetNamespaces([]namespace.Metadata{namesp}).
    91  		// Use TChannel clients for writing / reading because we want to target individual nodes at a time
    92  		// and not write/read all nodes in the cluster.
    93  		SetUseTChannelClientForWriting(true).
    94  		SetUseTChannelClientForReading(true)
    95  
    96  	setupOpts := []BootstrappableTestSetupOptions{
    97  		{
    98  			DisablePeersBootstrapper: true,
    99  			EnableRepairs:            true,
   100  			// Test forcing repair of type compare only repair.
   101  			ForceRepairs: true,
   102  			RepairType:   repair.OnlyCompareRepair,
   103  		},
   104  		{
   105  			DisablePeersBootstrapper: true,
   106  			EnableRepairs:            true,
   107  			// Test forcing repair of type compare only repair.
   108  			ForceRepairs: true,
   109  			RepairType:   repair.OnlyCompareRepair,
   110  		},
   111  		{
   112  			DisablePeersBootstrapper: true,
   113  			EnableRepairs:            true,
   114  			// Test forcing repair of type compare only repair.
   115  			ForceRepairs: true,
   116  			RepairType:   repair.OnlyCompareRepair,
   117  		},
   118  	}
   119  
   120  	// nolint: govet
   121  	setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts)
   122  	defer closeFn()
   123  
   124  	// Ensure that the current time is set such that the previous block is flushable.
   125  	blockSize := retentionOpts.BlockSize()
   126  	now := setups[0].NowFn()().Truncate(blockSize).Add(retentionOpts.BufferPast()).Add(time.Second)
   127  	for _, setup := range setups {
   128  		setup.SetNowFn(now)
   129  	}
   130  
   131  	node0Data, node1Data, node2Data, _ := genRepairData(now, blockSize)
   132  	if node0Data != nil {
   133  		require.NoError(t, writeTestDataToDisk(namesp, setups[0], node0Data, 0))
   134  	}
   135  	if node1Data != nil {
   136  		require.NoError(t, writeTestDataToDisk(namesp, setups[1], node1Data, 0))
   137  	}
   138  	if node2Data != nil {
   139  		require.NoError(t, writeTestDataToDisk(namesp, setups[2], node2Data, 0))
   140  	}
   141  
   142  	// Start the servers with filesystem bootstrappers.
   143  	setups.parallel(func(s TestSetup) {
   144  		if err := s.StartServer(); err != nil {
   145  			panic(err)
   146  		}
   147  	})
   148  	log.Debug("servers are now up")
   149  
   150  	// Stop the servers.
   151  	defer func() {
   152  		setups.parallel(func(s TestSetup) {
   153  			require.NoError(t, s.StopServer())
   154  		})
   155  		log.Debug("servers are now down")
   156  	}()
   157  
   158  	// Wait for repairs to occur at least once per node.
   159  	log.Debug("waiting for repairs to run")
   160  	var runSuccessPerNodeCounters []tally.CounterSnapshot
   161  	require.True(t, waitUntil(func() bool {
   162  		var successCounters []tally.CounterSnapshot
   163  		for _, setup := range setups {
   164  			scope := setup.Scope()
   165  			for _, v := range scope.Snapshot().Counters() {
   166  				if v.Name() != "repair.run" {
   167  					continue
   168  				}
   169  				repairType, ok := v.Tags()["repair_type"]
   170  				if !ok || repairType != "only_compare" {
   171  					continue
   172  				}
   173  				if v.Value() > 0 {
   174  					successCounters = append(successCounters, v)
   175  					break
   176  				}
   177  			}
   178  		}
   179  
   180  		// Check if all counters are success.
   181  		successAll := len(successCounters) == len(setups)
   182  		if successAll {
   183  			runSuccessPerNodeCounters = successCounters
   184  			return true
   185  		}
   186  		return false
   187  	}, 60*time.Second))
   188  
   189  	// Verify that the repair runs only ran comparisons without repairing data.
   190  	log.Debug("verifying repairs that ran")
   191  	require.Equal(t, len(setups), len(runSuccessPerNodeCounters),
   192  		"unexpected number of successful nodes ran repairs")
   193  	for _, counter := range runSuccessPerNodeCounters {
   194  		repairType, ok := counter.Tags()["repair_type"]
   195  		require.True(t, ok)
   196  		require.Equal(t, "only_compare", repairType)
   197  		require.True(t, counter.Value() > 0)
   198  	}
   199  
   200  	// Verify data did not change (repair type is compare only).
   201  	verifySeriesMaps(t, setups[0], namesp.ID(), node0Data)
   202  	verifySeriesMaps(t, setups[1], namesp.ID(), node1Data)
   203  	verifySeriesMaps(t, setups[2], namesp.ID(), node2Data)
   204  }