github.com/m3db/m3@v1.5.0/src/dbnode/integration/bootstrap_retries_test.go (about) 1 // +build integration 2 3 // Copyright (c) 2021 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package integration 24 25 import ( 26 "errors" 27 "strings" 28 "testing" 29 "time" 30 31 "github.com/stretchr/testify/assert" 32 "github.com/stretchr/testify/require" 33 "github.com/uber-go/tally" 34 35 "github.com/m3db/m3/src/cluster/shard" 36 "github.com/m3db/m3/src/dbnode/integration/generate" 37 "github.com/m3db/m3/src/dbnode/namespace" 38 "github.com/m3db/m3/src/dbnode/retention" 39 "github.com/m3db/m3/src/dbnode/storage" 40 "github.com/m3db/m3/src/dbnode/storage/bootstrap" 41 "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper" 42 "github.com/m3db/m3/src/x/context" 43 "github.com/m3db/m3/src/x/ident" 44 ) 45 46 func TestBootstrapRetriesDueToError(t *testing.T) { 47 // Setup the test bootstrapper to only proceed when a signal is sent. 48 signalCh := make(chan bool) 49 50 setup, testScope := bootstrapRetryTestSetup(t, func( 51 ctx context.Context, 52 namespaces bootstrap.Namespaces, 53 cache bootstrap.Cache, 54 ) (bootstrap.NamespaceResults, error) { 55 shouldError := <-signalCh 56 if shouldError { 57 return bootstrap.NamespaceResults{}, errors.New("error in bootstrapper") 58 } 59 // Mark all as fulfilled 60 bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide() 61 require.NoError(t, err) 62 return bs.Bootstrap(ctx, namespaces, cache) 63 }) 64 65 go func() { 66 // Wait for server to get started by the main test method. 67 require.NoError(t, setup.WaitUntilServerIsUp()) 68 69 // First bootstrap pass. Bootstrapper produces an error. Check if DB is not marked bootstrapped. 70 signalCh <- true 71 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 72 73 // Bootstrap retry. Bootstrapper completes persist range without errors. Check if DB isn't 74 // marked as bootstrapped on the second pass. 75 signalCh <- false 76 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 77 78 // Still bootstrap retry. Bootstrapper completes in-memory range without errors. DB finishes bootstrapping. 79 signalCh <- false 80 }() 81 82 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 83 defer func() { 84 require.NoError(t, setup.StopServer()) 85 }() 86 87 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 88 assertRetryMetric(t, testScope, "other") 89 } 90 91 func TestBootstrapRetriesDueToObsoleteRanges(t *testing.T) { 92 // Setup the test bootstrapper to only proceed when a signal is sent. 93 signalCh := make(chan struct{}) 94 95 setup, testScope := bootstrapRetryTestSetup(t, func( 96 ctx context.Context, 97 namespaces bootstrap.Namespaces, 98 cache bootstrap.Cache, 99 ) (bootstrap.NamespaceResults, error) { 100 // read from signalCh twice so we could advance the clock exactly in between of those signals 101 <-signalCh 102 <-signalCh 103 bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide() 104 require.NoError(t, err) 105 return bs.Bootstrap(ctx, namespaces, cache) 106 }) 107 108 go assertBootstrapRetry(t, setup, signalCh) 109 110 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 111 defer func() { 112 require.NoError(t, setup.StopServer()) 113 }() 114 115 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 116 assertRetryMetric(t, testScope, "obsolete-ranges") 117 } 118 119 func TestNoOpenFilesWhenBootstrapRetriesDueToObsoleteRanges(t *testing.T) { 120 // Setup the test bootstrapper to only proceed when a signal is sent. 121 signalCh := make(chan struct{}) 122 123 setup, testScope := bootstrapRetryTestSetup(t, func( 124 ctx context.Context, 125 namespaces bootstrap.Namespaces, 126 cache bootstrap.Cache, 127 ) (bootstrap.NamespaceResults, error) { 128 // read from signalCh twice so we could advance the clock exactly in between of those signals 129 <-signalCh 130 <-signalCh 131 bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide() 132 require.NoError(t, err) 133 return bs.Bootstrap(ctx, namespaces, cache) 134 }) 135 136 go assertBootstrapRetry(t, setup, signalCh) 137 138 // Write test data 139 now := setup.NowFn()() 140 141 fooSeries := generate.Series{ 142 ID: ident.StringID("foo"), 143 Tags: ident.NewTags(ident.StringTag("city", "new_york"), ident.StringTag("foo", "foo")), 144 } 145 146 barSeries := generate.Series{ 147 ID: ident.StringID("bar"), 148 Tags: ident.NewTags(ident.StringTag("city", "new_jersey")), 149 } 150 151 bazSeries := generate.Series{ 152 ID: ident.StringID("baz"), 153 Tags: ident.NewTags(ident.StringTag("city", "seattle")), 154 } 155 156 blockSize := 2 * time.Hour 157 158 ns1 := setup.Namespaces()[0] 159 seriesMaps := generate.BlocksByStart([]generate.BlockConfig{ 160 { 161 IDs: []string{fooSeries.ID.String()}, 162 Tags: fooSeries.Tags, 163 NumPoints: 100, 164 Start: now.Add(-1 * blockSize), 165 }, 166 { 167 IDs: []string{barSeries.ID.String()}, 168 Tags: barSeries.Tags, 169 NumPoints: 100, 170 Start: now.Add(-1 * blockSize), 171 }, 172 { 173 IDs: []string{fooSeries.ID.String()}, 174 Tags: fooSeries.Tags, 175 NumPoints: 100, 176 Start: now.Add(1 * blockSize), 177 }, 178 { 179 IDs: []string{barSeries.ID.String()}, 180 Tags: barSeries.Tags, 181 NumPoints: 100, 182 Start: now.Add(1 * blockSize), 183 }, 184 { 185 IDs: []string{fooSeries.ID.String()}, 186 Tags: fooSeries.Tags, 187 NumPoints: 50, 188 Start: now, 189 }, 190 { 191 IDs: []string{bazSeries.ID.String()}, 192 Tags: bazSeries.Tags, 193 NumPoints: 50, 194 Start: now, 195 }, 196 }) 197 198 require.NoError(t, writeTestDataToDiskWithIndex(ns1, setup, seriesMaps)) 199 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 200 defer func() { 201 require.NoError(t, setup.StopServerAndVerifyOpenFilesAreClosed()) 202 setup.Close() 203 }() 204 205 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 206 assertRetryMetric(t, testScope, "obsolete-ranges") 207 } 208 209 func TestBootstrapRetriesDueToUnfulfilledRanges(t *testing.T) { 210 // Setup the test bootstrapper to only proceed when a signal is sent. 211 signalCh := make(chan bool) 212 213 setup, testScope := bootstrapRetryTestSetup(t, func( 214 ctx context.Context, 215 namespaces bootstrap.Namespaces, 216 cache bootstrap.Cache, 217 ) (bootstrap.NamespaceResults, error) { 218 var provider bootstrap.BootstrapperProvider 219 shouldUnfulfill := <-signalCh 220 if shouldUnfulfill { 221 provider = bootstrapper.NewNoOpNoneBootstrapperProvider() 222 } else { 223 provider = bootstrapper.NewNoOpAllBootstrapperProvider() 224 } 225 bs, err := provider.Provide() 226 require.NoError(t, err) 227 return bs.Bootstrap(ctx, namespaces, cache) 228 }) 229 230 go func() { 231 // Wait for server to get started by the main test method. 232 require.NoError(t, setup.WaitUntilServerIsUp()) 233 234 // First bootstrap pass. Bootstrap produces unfulfilled ranges for persist range. 235 // Check if DB is not marked bootstrapped. 236 signalCh <- true 237 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 238 // Still first bootstrap pass. Bootstrap produces unfulfilled ranges for in-memory range. 239 // Check if DB is not marked bootstrapped. 240 signalCh <- true 241 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 242 243 // Bootstrap retry. Bootstrapper completes persist range fulfilling everything. 244 // Check if DB isn't marked as bootstrapped on the second pass. 245 signalCh <- false 246 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 247 248 // Still bootstrap retry. Bootstrapper completes in-memory range fulfilling everything. 249 // DB finishes bootstrapping. 250 signalCh <- false 251 }() 252 253 require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete 254 defer func() { 255 require.NoError(t, setup.StopServer()) 256 }() 257 258 assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped") 259 260 assertRetryMetric(t, testScope, "other") 261 } 262 263 func assertBootstrapRetry(t *testing.T, setup TestSetup, signalCh chan struct{}) { 264 // Wait for server to get started by the main test method. 265 require.NoError(t, setup.WaitUntilServerIsUp()) 266 267 // First bootstrap pass, persist ranges. Check if DB is not marked bootstrapped and advance clock. 268 signalCh <- struct{}{} 269 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 270 setup.SetNowFn(setup.NowFn()().Add(2 * time.Hour)) 271 signalCh <- struct{}{} 272 273 // Still first bootstrap pass, in-memory ranges. Due to advanced clock previously calculated 274 // ranges are obsolete. Check if DB is not marked bootstrapped. 275 signalCh <- struct{}{} 276 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 277 signalCh <- struct{}{} 278 279 // Bootstrap retry, persist ranges. Check if DB isn't marked as bootstrapped on the second pass. 280 signalCh <- struct{}{} 281 assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped") 282 signalCh <- struct{}{} 283 } 284 285 type bootstrapFn = func( 286 ctx context.Context, 287 namespaces bootstrap.Namespaces, 288 cache bootstrap.Cache, 289 ) (bootstrap.NamespaceResults, error) 290 291 func bootstrapRetryTestSetup(t *testing.T, bootstrapFn bootstrapFn) (TestSetup, tally.TestScope) { 292 testScope := tally.NewTestScope("testScope", map[string]string{}) 293 294 rOpts := retention.NewOptions(). 295 SetRetentionPeriod(12 * time.Hour). 296 SetBufferPast(5 * time.Minute). 297 SetBufferFuture(5 * time.Minute) 298 299 ns1, err := namespace.NewMetadata(testNamespaces[0], namespace.NewOptions().SetRetentionOptions(rOpts)) 300 require.NoError(t, err) 301 opts := NewTestOptions(t). 302 SetNamespaces([]namespace.Metadata{ns1}). 303 SetShardSetOptions(&TestShardSetOptions{ 304 // Set all shards to initializing so bootstrap is 305 // retried on an obsolete range (which is not done 306 // if all shards are available and hence coming from disk). 307 ShardState: shard.Initializing, 308 }) 309 310 setup, err := NewTestSetup(t, opts, nil, func(storageOpts storage.Options) storage.Options { 311 return storageOpts.SetInstrumentOptions(storageOpts.InstrumentOptions().SetMetricsScope(testScope)) 312 }) 313 require.NoError(t, err) 314 defer setup.Close() 315 316 var ( 317 fsOpts = setup.StorageOpts().CommitLogOptions().FilesystemOptions() 318 319 bootstrapOpts = newDefaulTestResultOptions(setup.StorageOpts()) 320 bootstrapperSourceOpts = testBootstrapperSourceOptions{read: bootstrapFn} 321 processOpts = bootstrap.NewProcessOptions(). 322 SetTopologyMapProvider(setup). 323 SetOrigin(setup.Origin()) 324 ) 325 bootstrapOpts.SetInstrumentOptions(bootstrapOpts.InstrumentOptions().SetMetricsScope(testScope)) 326 boostrapper := newTestBootstrapperSource(bootstrapperSourceOpts, bootstrapOpts, nil) 327 328 processProvider, err := bootstrap.NewProcessProvider( 329 boostrapper, processOpts, bootstrapOpts, fsOpts) 330 require.NoError(t, err) 331 setup.SetStorageOpts(setup.StorageOpts().SetBootstrapProcessProvider(processProvider)) 332 return setup, testScope 333 } 334 335 func assertRetryMetric(t *testing.T, testScope tally.TestScope, expectedReason string) { 336 const ( 337 metricName = "bootstrap-retries" 338 reasonTag = "reason" 339 ) 340 valuesByReason := make(map[string]int) 341 for _, counter := range testScope.Snapshot().Counters() { 342 if strings.Contains(counter.Name(), metricName) { 343 reason := "" 344 if r, ok := counter.Tags()[reasonTag]; ok { 345 reason = r 346 } 347 valuesByReason[reason] = int(counter.Value()) 348 } 349 } 350 351 val, ok := valuesByReason[expectedReason] 352 if assert.True(t, ok, "missing metric for expected reason") { 353 assert.Equal(t, 1, val) 354 } 355 for r, val := range valuesByReason { 356 if r != expectedReason { 357 assert.Equal(t, 0, val) 358 } 359 } 360 }