github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/bootstrap_retries_test.go (about) 1 //go:build integration 2 // +build integration 3 4 // Copyright (c) 2021 Uber Technologies, Inc. 5 // 6 // Permission is hereby granted, free of charge, to any person obtaining a copy 7 // of this software and associated documentation files (the "Software"), to deal 8 // in the Software without restriction, including without limitation the rights 9 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 // copies of the Software, and to permit persons to whom the Software is 11 // furnished to do so, subject to the following conditions: 12 // 13 // The above copyright notice and this permission notice shall be included in 14 // all copies or substantial portions of the Software. 15 // 16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 // THE SOFTWARE. 23 24 package integration 25 26 import ( 27 "errors" 28 "strings" 29 "testing" 30 "time" 31 32 "github.com/m3db/m3/src/cluster/shard" 33 "github.com/m3db/m3/src/dbnode/integration/generate" 34 "github.com/m3db/m3/src/dbnode/namespace" 35 "github.com/m3db/m3/src/dbnode/retention" 36 "github.com/m3db/m3/src/dbnode/storage" 37 "github.com/m3db/m3/src/dbnode/storage/bootstrap" 38 "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper" 39 "github.com/m3db/m3/src/x/context" 40 "github.com/m3db/m3/src/x/ident" 41 42 "github.com/stretchr/testify/assert" 43 "github.com/stretchr/testify/require" 44 "github.com/uber-go/tally" 45 ) 46 47 func TestBootstrapRetriesDueToError(t *testing.T) { 48 // Setup the test bootstrapper to only proceed when a signal is sent. 49 signalCh := make(chan bool) 50 51 setup, testScope := bootstrapRetryTestSetup(t, func( 52 ctx context.Context, 53 namespaces bootstrap.Namespaces, 54 cache bootstrap.Cache, 55 ) (bootstrap.NamespaceResults, error) { 56 shouldError := <-signalCh 57 if shouldError { 58 return bootstrap.NamespaceResults{}, errors.New("error in bootstrapper") 59 } 60 // Mark all as fulfilled 61 bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide() 62 require.NoError(t, err) 63 return bs.Bootstrap(ctx, namespaces, cache) 64 }) 65 66 go func() { 67 // Wait for server to get started by the main test method. 68 require.NoError(t, setup.WaitUntilServerIsUp()) 69 70 // First bootstrap pass. Bootstrapper produces an error. Check if DB is not marked bootstrapped. 71 signalCh <- true 72 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 73 74 // Bootstrap retry. Bootstrapper completes persist range without errors. Check if DB isn't 75 // marked as bootstrapped on the second pass. 76 signalCh <- false 77 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 78 79 // Still bootstrap retry. Bootstrapper completes in-memory range without errors. DB finishes bootstrapping. 80 signalCh <- false 81 }() 82 83 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 84 defer func() { 85 require.NoError(t, setup.StopServer()) 86 }() 87 88 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 89 assertRetryMetric(t, testScope, "other") 90 } 91 92 func TestBootstrapRetriesDueToObsoleteRanges(t *testing.T) { 93 // Setup the test bootstrapper to only proceed when a signal is sent. 94 signalCh := make(chan struct{}) 95 96 setup, testScope := bootstrapRetryTestSetup(t, func( 97 ctx context.Context, 98 namespaces bootstrap.Namespaces, 99 cache bootstrap.Cache, 100 ) (bootstrap.NamespaceResults, error) { 101 // read from signalCh twice so we could advance the clock exactly in between of those signals 102 <-signalCh 103 <-signalCh 104 bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide() 105 require.NoError(t, err) 106 return bs.Bootstrap(ctx, namespaces, cache) 107 }) 108 109 go assertBootstrapRetry(t, setup, signalCh) 110 111 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 112 defer func() { 113 require.NoError(t, setup.StopServer()) 114 }() 115 116 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 117 assertRetryMetric(t, testScope, "obsolete-ranges") 118 } 119 120 func TestNoOpenFilesWhenBootstrapRetriesDueToObsoleteRanges(t *testing.T) { 121 // Setup the test bootstrapper to only proceed when a signal is sent. 122 signalCh := make(chan struct{}) 123 124 setup, testScope := bootstrapRetryTestSetup(t, func( 125 ctx context.Context, 126 namespaces bootstrap.Namespaces, 127 cache bootstrap.Cache, 128 ) (bootstrap.NamespaceResults, error) { 129 // read from signalCh twice so we could advance the clock exactly in between of those signals 130 <-signalCh 131 <-signalCh 132 bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide() 133 require.NoError(t, err) 134 return bs.Bootstrap(ctx, namespaces, cache) 135 }) 136 137 go assertBootstrapRetry(t, setup, signalCh) 138 139 // Write test data 140 now := setup.NowFn()() 141 142 fooSeries := generate.Series{ 143 ID: ident.StringID("foo"), 144 Tags: ident.NewTags(ident.StringTag("city", "new_york"), ident.StringTag("foo", "foo")), 145 } 146 147 barSeries := generate.Series{ 148 ID: ident.StringID("bar"), 149 Tags: ident.NewTags(ident.StringTag("city", "new_jersey")), 150 } 151 152 bazSeries := generate.Series{ 153 ID: ident.StringID("baz"), 154 Tags: ident.NewTags(ident.StringTag("city", "seattle")), 155 } 156 157 blockSize := 2 * time.Hour 158 159 ns1 := setup.Namespaces()[0] 160 seriesMaps := generate.BlocksByStart([]generate.BlockConfig{ 161 { 162 IDs: []string{fooSeries.ID.String()}, 163 Tags: fooSeries.Tags, 164 NumPoints: 100, 165 Start: now.Add(-1 * blockSize), 166 }, 167 { 168 IDs: []string{barSeries.ID.String()}, 169 Tags: barSeries.Tags, 170 NumPoints: 100, 171 Start: now.Add(-1 * blockSize), 172 }, 173 { 174 IDs: []string{fooSeries.ID.String()}, 175 Tags: fooSeries.Tags, 176 NumPoints: 100, 177 Start: now.Add(1 * blockSize), 178 }, 179 { 180 IDs: []string{barSeries.ID.String()}, 181 Tags: barSeries.Tags, 182 NumPoints: 100, 183 Start: now.Add(1 * blockSize), 184 }, 185 { 186 IDs: []string{fooSeries.ID.String()}, 187 Tags: fooSeries.Tags, 188 NumPoints: 50, 189 Start: now, 190 }, 191 { 192 IDs: []string{bazSeries.ID.String()}, 193 Tags: bazSeries.Tags, 194 NumPoints: 50, 195 Start: now, 196 }, 197 }) 198 199 require.NoError(t, writeTestDataToDiskWithIndex(ns1, setup, seriesMaps)) 200 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 201 defer func() { 202 require.NoError(t, setup.StopServerAndVerifyOpenFilesAreClosed()) 203 setup.Close() 204 }() 205 206 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 207 assertRetryMetric(t, testScope, "obsolete-ranges") 208 } 209 210 func TestBootstrapRetriesDueToUnfulfilledRanges(t *testing.T) { 211 // Setup the test bootstrapper to only proceed when a signal is sent. 212 signalCh := make(chan bool) 213 214 setup, testScope := bootstrapRetryTestSetup(t, func( 215 ctx context.Context, 216 namespaces bootstrap.Namespaces, 217 cache bootstrap.Cache, 218 ) (bootstrap.NamespaceResults, error) { 219 var provider bootstrap.BootstrapperProvider 220 shouldUnfulfill := <-signalCh 221 if shouldUnfulfill { 222 provider = bootstrapper.NewNoOpNoneBootstrapperProvider() 223 } else { 224 provider = bootstrapper.NewNoOpAllBootstrapperProvider() 225 } 226 bs, err := provider.Provide() 227 require.NoError(t, err) 228 return bs.Bootstrap(ctx, namespaces, cache) 229 }) 230 231 go func() { 232 // Wait for server to get started by the main test method. 233 require.NoError(t, setup.WaitUntilServerIsUp()) 234 235 // First bootstrap pass. Bootstrap produces unfulfilled ranges for persist range. 236 // Check if DB is not marked bootstrapped. 237 signalCh <- true 238 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 239 // Still first bootstrap pass. Bootstrap produces unfulfilled ranges for in-memory range. 240 // Check if DB is not marked bootstrapped. 241 signalCh <- true 242 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 243 244 // Bootstrap retry. Bootstrapper completes persist range fulfilling everything. 245 // Check if DB isn't marked as bootstrapped on the second pass. 246 signalCh <- false 247 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 248 249 // Still bootstrap retry. Bootstrapper completes in-memory range fulfilling everything. 250 // DB finishes bootstrapping. 251 signalCh <- false 252 }() 253 254 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 255 defer func() { 256 require.NoError(t, setup.StopServer()) 257 }() 258 259 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 260 261 assertRetryMetric(t, testScope, "other") 262 } 263 264 func assertBootstrapRetry(t *testing.T, setup TestSetup, signalCh chan struct{}) { 265 // Wait for server to get started by the main test method. 266 require.NoError(t, setup.WaitUntilServerIsUp()) 267 268 // First bootstrap pass, persist ranges. Check if DB is not marked bootstrapped and advance clock. 269 signalCh <- struct{}{} 270 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 271 setup.SetNowFn(setup.NowFn()().Add(2 * time.Hour)) 272 signalCh <- struct{}{} 273 274 // Still first bootstrap pass, in-memory ranges. Due to advanced clock previously calculated 275 // ranges are obsolete. Check if DB is not marked bootstrapped. 276 signalCh <- struct{}{} 277 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 278 signalCh <- struct{}{} 279 280 // Bootstrap retry, persist ranges. Check if DB isn't marked as bootstrapped on the second pass. 281 signalCh <- struct{}{} 282 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 283 signalCh <- struct{}{} 284 } 285 286 type bootstrapFn = func( 287 ctx context.Context, 288 namespaces bootstrap.Namespaces, 289 cache bootstrap.Cache, 290 ) (bootstrap.NamespaceResults, error) 291 292 func bootstrapRetryTestSetup(t *testing.T, bootstrapFn bootstrapFn) (TestSetup, tally.TestScope) { 293 testScope := tally.NewTestScope("testScope", map[string]string{}) 294 295 rOpts := retention.NewOptions(). 296 SetRetentionPeriod(12 * time.Hour). 297 SetBufferPast(5 * time.Minute). 298 SetBufferFuture(5 * time.Minute) 299 300 ns1, err := namespace.NewMetadata(testNamespaces[0], namespace.NewOptions().SetRetentionOptions(rOpts)) 301 require.NoError(t, err) 302 opts := NewTestOptions(t). 303 SetNamespaces([]namespace.Metadata{ns1}). 304 SetShardSetOptions(&TestShardSetOptions{ 305 // Set all shards to initializing so bootstrap is 306 // retried on an obsolete range (which is not done 307 // if all shards are available and hence coming from disk). 308 ShardState: shard.Initializing, 309 }) 310 311 setup, err := NewTestSetup(t, opts, nil, func(storageOpts storage.Options) storage.Options { 312 return storageOpts.SetInstrumentOptions(storageOpts.InstrumentOptions().SetMetricsScope(testScope)) 313 }) 314 require.NoError(t, err) 315 defer setup.Close() 316 317 var ( 318 fsOpts = setup.StorageOpts().CommitLogOptions().FilesystemOptions() 319 320 bootstrapOpts = newDefaulTestResultOptions(setup.StorageOpts()) 321 bootstrapperSourceOpts = testBootstrapperSourceOptions{read: bootstrapFn} 322 processOpts = bootstrap.NewProcessOptions(). 323 SetTopologyMapProvider(setup). 324 SetOrigin(setup.Origin()) 325 ) 326 bootstrapOpts.SetInstrumentOptions(bootstrapOpts.InstrumentOptions().SetMetricsScope(testScope)) 327 boostrapper := newTestBootstrapperSource(bootstrapperSourceOpts, bootstrapOpts, nil) 328 329 processProvider, err := bootstrap.NewProcessProvider( 330 boostrapper, processOpts, bootstrapOpts, fsOpts) 331 require.NoError(t, err) 332 setup.SetStorageOpts(setup.StorageOpts().SetBootstrapProcessProvider(processProvider)) 333 return setup, testScope 334 } 335 336 func assertRetryMetric(t *testing.T, testScope tally.TestScope, expectedReason string) { 337 const ( 338 metricName = "bootstrap-retries" 339 reasonTag = "reason" 340 ) 341 valuesByReason := make(map[string]int) 342 for _, counter := range testScope.Snapshot().Counters() { 343 if strings.Contains(counter.Name(), metricName) { 344 reason := "" 345 if r, ok := counter.Tags()[reasonTag]; ok { 346 reason = r 347 } 348 valuesByReason[reason] = int(counter.Value()) 349 } 350 } 351 352 val, ok := valuesByReason[expectedReason] 353 if assert.True(t, ok, "missing metric for expected reason") { 354 assert.Equal(t, 1, val) 355 } 356 for r, val := range valuesByReason { 357 if r != expectedReason { 358 assert.Equal(t, 0, val) 359 } 360 } 361 }