github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/repair_force_only_compare_test.go (about) 1 //go:build integration 2 // +build integration 3 4 // Copyright (c) 2021 Uber Technologies, Inc. 5 // 6 // Permission is hereby granted, free of charge, to any person obtaining a copy 7 // of this software and associated documentation files (the "Software"), to deal 8 // in the Software without restriction, including without limitation the rights 9 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 // copies of the Software, and to permit persons to whom the Software is 11 // furnished to do so, subject to the following conditions: 12 // 13 // The above copyright notice and this permission notice shall be included in 14 // all copies or substantial portions of the Software. 15 // 16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 // THE SOFTWARE. 23 24 package integration 25 26 import ( 27 "testing" 28 "time" 29 30 "github.com/m3db/m3/src/dbnode/integration/generate" 31 "github.com/m3db/m3/src/dbnode/namespace" 32 "github.com/m3db/m3/src/dbnode/retention" 33 "github.com/m3db/m3/src/dbnode/storage/repair" 34 xtest "github.com/m3db/m3/src/x/test" 35 xtime "github.com/m3db/m3/src/x/time" 36 37 "github.com/stretchr/testify/require" 38 "github.com/uber-go/tally" 39 ) 40 41 func TestRepairForceAndOnlyCompare(t *testing.T) { 42 if testing.Short() { 43 t.SkipNow() 44 } 45 46 // Test both disjoint and shared series repair. 47 genRepairData := genRepairDatafn(func(now xtime.UnixNano, blockSize time.Duration) ( 48 node0Data generate.SeriesBlocksByStart, 49 node1Data generate.SeriesBlocksByStart, 50 node2Data generate.SeriesBlocksByStart, 51 allData generate.SeriesBlocksByStart, 52 ) { 53 currBlockStart := now.Truncate(blockSize) 54 node0Data = generate.BlocksByStart([]generate.BlockConfig{ 55 {IDs: []string{"foo"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)}, 56 {IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)}, 57 }) 58 node1Data = generate.BlocksByStart([]generate.BlockConfig{ 59 {IDs: []string{"bar"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)}, 60 {IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)}, 61 }) 62 63 allData = make(map[xtime.UnixNano]generate.SeriesBlock) 64 for start, data := range node0Data { 65 allData[start] = append(allData[start], data...) 66 } 67 for start, data := range node1Data { 68 allData[start] = append(allData[start], data...) 69 } 70 for start, data := range node2Data { 71 allData[start] = append(allData[start], data...) 72 } 73 74 return node0Data, node1Data, node2Data, allData 75 }) 76 77 // Test setups. 78 log := xtest.NewLogger(t) 79 retentionOpts := retention.NewOptions(). 80 SetRetentionPeriod(20 * time.Hour). 81 SetBlockSize(2 * time.Hour). 82 SetBufferPast(10 * time.Minute). 83 SetBufferFuture(2 * time.Minute) 84 nsOpts := namespace.NewOptions(). 85 // Test needing to force enable repairs. 86 SetRepairEnabled(false). 87 SetRetentionOptions(retentionOpts) 88 namesp, err := namespace.NewMetadata(testNamespaces[0], nsOpts) 89 require.NoError(t, err) 90 opts := NewTestOptions(t). 91 SetNamespaces([]namespace.Metadata{namesp}). 92 // Use TChannel clients for writing / reading because we want to target individual nodes at a time 93 // and not write/read all nodes in the cluster. 94 SetUseTChannelClientForWriting(true). 95 SetUseTChannelClientForReading(true) 96 97 setupOpts := []BootstrappableTestSetupOptions{ 98 { 99 DisablePeersBootstrapper: true, 100 EnableRepairs: true, 101 // Test forcing repair of type compare only repair. 102 ForceRepairs: true, 103 RepairType: repair.OnlyCompareRepair, 104 }, 105 { 106 DisablePeersBootstrapper: true, 107 EnableRepairs: true, 108 // Test forcing repair of type compare only repair. 109 ForceRepairs: true, 110 RepairType: repair.OnlyCompareRepair, 111 }, 112 { 113 DisablePeersBootstrapper: true, 114 EnableRepairs: true, 115 // Test forcing repair of type compare only repair. 116 ForceRepairs: true, 117 RepairType: repair.OnlyCompareRepair, 118 }, 119 } 120 121 // nolint: govet 122 setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) 123 defer closeFn() 124 125 // Ensure that the current time is set such that the previous block is flushable. 126 blockSize := retentionOpts.BlockSize() 127 now := setups[0].NowFn()().Truncate(blockSize).Add(retentionOpts.BufferPast()).Add(time.Second) 128 for _, setup := range setups { 129 setup.SetNowFn(now) 130 } 131 132 node0Data, node1Data, node2Data, _ := genRepairData(now, blockSize) 133 if node0Data != nil { 134 require.NoError(t, writeTestDataToDisk(namesp, setups[0], node0Data, 0)) 135 } 136 if node1Data != nil { 137 require.NoError(t, writeTestDataToDisk(namesp, setups[1], node1Data, 0)) 138 } 139 if node2Data != nil { 140 require.NoError(t, writeTestDataToDisk(namesp, setups[2], node2Data, 0)) 141 } 142 143 // Start the servers with filesystem bootstrappers. 144 setups.parallel(func(s TestSetup) { 145 if err := s.StartServer(); err != nil { 146 panic(err) 147 } 148 }) 149 log.Debug("servers are now up") 150 151 // Stop the servers. 152 defer func() { 153 setups.parallel(func(s TestSetup) { 154 require.NoError(t, s.StopServer()) 155 }) 156 log.Debug("servers are now down") 157 }() 158 159 // Wait for repairs to occur at least once per node. 160 log.Debug("waiting for repairs to run") 161 var runSuccessPerNodeCounters []tally.CounterSnapshot 162 require.True(t, waitUntil(func() bool { 163 var successCounters []tally.CounterSnapshot 164 for _, setup := range setups { 165 scope := setup.Scope() 166 for _, v := range scope.Snapshot().Counters() { 167 if v.Name() != "repair.run" { 168 continue 169 } 170 repairType, ok := v.Tags()["repair_type"] 171 if !ok || repairType != "only_compare" { 172 continue 173 } 174 if v.Value() > 0 { 175 successCounters = append(successCounters, v) 176 break 177 } 178 } 179 } 180 181 // Check if all counters are success. 182 successAll := len(successCounters) == len(setups) 183 if successAll { 184 runSuccessPerNodeCounters = successCounters 185 return true 186 } 187 return false 188 }, 60*time.Second)) 189 190 // Verify that the repair runs only ran comparisons without repairing data. 191 log.Debug("verifying repairs that ran") 192 require.Equal(t, len(setups), len(runSuccessPerNodeCounters), 193 "unexpected number of successful nodes ran repairs") 194 for _, counter := range runSuccessPerNodeCounters { 195 repairType, ok := counter.Tags()["repair_type"] 196 require.True(t, ok) 197 require.Equal(t, "only_compare", repairType) 198 require.True(t, counter.Value() > 0) 199 } 200 201 // Verify data did not change (repair type is compare only). 202 verifySeriesMaps(t, setups[0], namesp.ID(), node0Data) 203 verifySeriesMaps(t, setups[1], namesp.ID(), node1Data) 204 verifySeriesMaps(t, setups[2], namesp.ID(), node2Data) 205 }