github.com/m3db/m3@v1.5.0/src/dbnode/integration/repair_force_only_compare_test.go (about) 1 // +build integration 2 3 // Copyright (c) 2021 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package integration 24 25 import ( 26 "testing" 27 "time" 28 29 "github.com/stretchr/testify/require" 30 "github.com/uber-go/tally" 31 32 "github.com/m3db/m3/src/dbnode/integration/generate" 33 "github.com/m3db/m3/src/dbnode/namespace" 34 "github.com/m3db/m3/src/dbnode/retention" 35 "github.com/m3db/m3/src/dbnode/storage/repair" 36 xtest "github.com/m3db/m3/src/x/test" 37 xtime "github.com/m3db/m3/src/x/time" 38 ) 39 40 func TestRepairForceAndOnlyCompare(t *testing.T) { 41 if testing.Short() { 42 t.SkipNow() 43 } 44 45 // Test both disjoint and shared series repair. 46 genRepairData := genRepairDatafn(func(now xtime.UnixNano, blockSize time.Duration) ( 47 node0Data generate.SeriesBlocksByStart, 48 node1Data generate.SeriesBlocksByStart, 49 node2Data generate.SeriesBlocksByStart, 50 allData generate.SeriesBlocksByStart, 51 ) { 52 currBlockStart := now.Truncate(blockSize) 53 node0Data = generate.BlocksByStart([]generate.BlockConfig{ 54 {IDs: []string{"foo"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)}, 55 {IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)}, 56 }) 57 node1Data = generate.BlocksByStart([]generate.BlockConfig{ 58 {IDs: []string{"bar"}, NumPoints: 90, Start: currBlockStart.Add(-4 * blockSize)}, 59 {IDs: []string{"foo", "baz"}, NumPoints: 90, Start: currBlockStart.Add(-3 * blockSize)}, 60 }) 61 62 allData = make(map[xtime.UnixNano]generate.SeriesBlock) 63 for start, data := range node0Data { 64 allData[start] = append(allData[start], data...) 65 } 66 for start, data := range node1Data { 67 allData[start] = append(allData[start], data...) 68 } 69 for start, data := range node2Data { 70 allData[start] = append(allData[start], data...) 71 } 72 73 return node0Data, node1Data, node2Data, allData 74 }) 75 76 // Test setups. 77 log := xtest.NewLogger(t) 78 retentionOpts := retention.NewOptions(). 79 SetRetentionPeriod(20 * time.Hour). 80 SetBlockSize(2 * time.Hour). 81 SetBufferPast(10 * time.Minute). 82 SetBufferFuture(2 * time.Minute) 83 nsOpts := namespace.NewOptions(). 84 // Test needing to force enable repairs. 85 SetRepairEnabled(false). 86 SetRetentionOptions(retentionOpts) 87 namesp, err := namespace.NewMetadata(testNamespaces[0], nsOpts) 88 require.NoError(t, err) 89 opts := NewTestOptions(t). 90 SetNamespaces([]namespace.Metadata{namesp}). 91 // Use TChannel clients for writing / reading because we want to target individual nodes at a time 92 // and not write/read all nodes in the cluster. 93 SetUseTChannelClientForWriting(true). 94 SetUseTChannelClientForReading(true) 95 96 setupOpts := []BootstrappableTestSetupOptions{ 97 { 98 DisablePeersBootstrapper: true, 99 EnableRepairs: true, 100 // Test forcing repair of type compare only repair. 101 ForceRepairs: true, 102 RepairType: repair.OnlyCompareRepair, 103 }, 104 { 105 DisablePeersBootstrapper: true, 106 EnableRepairs: true, 107 // Test forcing repair of type compare only repair. 108 ForceRepairs: true, 109 RepairType: repair.OnlyCompareRepair, 110 }, 111 { 112 DisablePeersBootstrapper: true, 113 EnableRepairs: true, 114 // Test forcing repair of type compare only repair. 115 ForceRepairs: true, 116 RepairType: repair.OnlyCompareRepair, 117 }, 118 } 119 120 // nolint: govet 121 setups, closeFn := NewDefaultBootstrappableTestSetups(t, opts, setupOpts) 122 defer closeFn() 123 124 // Ensure that the current time is set such that the previous block is flushable. 125 blockSize := retentionOpts.BlockSize() 126 now := setups[0].NowFn()().Truncate(blockSize).Add(retentionOpts.BufferPast()).Add(time.Second) 127 for _, setup := range setups { 128 setup.SetNowFn(now) 129 } 130 131 node0Data, node1Data, node2Data, _ := genRepairData(now, blockSize) 132 if node0Data != nil { 133 require.NoError(t, writeTestDataToDisk(namesp, setups[0], node0Data, 0)) 134 } 135 if node1Data != nil { 136 require.NoError(t, writeTestDataToDisk(namesp, setups[1], node1Data, 0)) 137 } 138 if node2Data != nil { 139 require.NoError(t, writeTestDataToDisk(namesp, setups[2], node2Data, 0)) 140 } 141 142 // Start the servers with filesystem bootstrappers. 143 setups.parallel(func(s TestSetup) { 144 if err := s.StartServer(); err != nil { 145 panic(err) 146 } 147 }) 148 log.Debug("servers are now up") 149 150 // Stop the servers. 151 defer func() { 152 setups.parallel(func(s TestSetup) { 153 require.NoError(t, s.StopServer()) 154 }) 155 log.Debug("servers are now down") 156 }() 157 158 // Wait for repairs to occur at least once per node. 159 log.Debug("waiting for repairs to run") 160 var runSuccessPerNodeCounters []tally.CounterSnapshot 161 require.True(t, waitUntil(func() bool { 162 var successCounters []tally.CounterSnapshot 163 for _, setup := range setups { 164 scope := setup.Scope() 165 for _, v := range scope.Snapshot().Counters() { 166 if v.Name() != "repair.run" { 167 continue 168 } 169 repairType, ok := v.Tags()["repair_type"] 170 if !ok || repairType != "only_compare" { 171 continue 172 } 173 if v.Value() > 0 { 174 successCounters = append(successCounters, v) 175 break 176 } 177 } 178 } 179 180 // Check if all counters are success. 181 successAll := len(successCounters) == len(setups) 182 if successAll { 183 runSuccessPerNodeCounters = successCounters 184 return true 185 } 186 return false 187 }, 60*time.Second)) 188 189 // Verify that the repair runs only ran comparisons without repairing data. 190 log.Debug("verifying repairs that ran") 191 require.Equal(t, len(setups), len(runSuccessPerNodeCounters), 192 "unexpected number of successful nodes ran repairs") 193 for _, counter := range runSuccessPerNodeCounters { 194 repairType, ok := counter.Tags()["repair_type"] 195 require.True(t, ok) 196 require.Equal(t, "only_compare", repairType) 197 require.True(t, counter.Value() > 0) 198 } 199 200 // Verify data did not change (repair type is compare only). 201 verifySeriesMaps(t, setups[0], namesp.ID(), node0Data) 202 verifySeriesMaps(t, setups[1], namesp.ID(), node1Data) 203 verifySeriesMaps(t, setups[2], namesp.ID(), node2Data) 204 }