github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/bootstrap_retries_test.go (about)

     1  //go:build integration
     2  // +build integration
     3  
     4  // Copyright (c) 2021 Uber Technologies, Inc.
     5  //
     6  // Permission is hereby granted, free of charge, to any person obtaining a copy
     7  // of this software and associated documentation files (the "Software"), to deal
     8  // in the Software without restriction, including without limitation the rights
     9  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    10  // copies of the Software, and to permit persons to whom the Software is
    11  // furnished to do so, subject to the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be included in
    14  // all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    17  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    18  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    19  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    20  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    21  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    22  // THE SOFTWARE.
    23  
    24  package integration
    25  
    26  import (
    27  	"errors"
    28  	"strings"
    29  	"testing"
    30  	"time"
    31  
    32  	"github.com/m3db/m3/src/cluster/shard"
    33  	"github.com/m3db/m3/src/dbnode/integration/generate"
    34  	"github.com/m3db/m3/src/dbnode/namespace"
    35  	"github.com/m3db/m3/src/dbnode/retention"
    36  	"github.com/m3db/m3/src/dbnode/storage"
    37  	"github.com/m3db/m3/src/dbnode/storage/bootstrap"
    38  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper"
    39  	"github.com/m3db/m3/src/x/context"
    40  	"github.com/m3db/m3/src/x/ident"
    41  
    42  	"github.com/stretchr/testify/assert"
    43  	"github.com/stretchr/testify/require"
    44  	"github.com/uber-go/tally"
    45  )
    46  
    47  func TestBootstrapRetriesDueToError(t *testing.T) {
    48  	// Setup the test bootstrapper to only proceed when a signal is sent.
    49  	signalCh := make(chan bool)
    50  
    51  	setup, testScope := bootstrapRetryTestSetup(t, func(
    52  		ctx context.Context,
    53  		namespaces bootstrap.Namespaces,
    54  		cache bootstrap.Cache,
    55  	) (bootstrap.NamespaceResults, error) {
    56  		shouldError := <-signalCh
    57  		if shouldError {
    58  			return bootstrap.NamespaceResults{}, errors.New("error in bootstrapper")
    59  		}
    60  		// Mark all as fulfilled
    61  		bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide()
    62  		require.NoError(t, err)
    63  		return bs.Bootstrap(ctx, namespaces, cache)
    64  	})
    65  
    66  	go func() {
    67  		// Wait for server to get started by the main test method.
    68  		require.NoError(t, setup.WaitUntilServerIsUp())
    69  
    70  		// First bootstrap pass. Bootstrapper produces an error. Check if DB is not marked bootstrapped.
    71  		signalCh <- true
    72  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
    73  
    74  		// Bootstrap retry. Bootstrapper completes persist range without errors. Check if DB isn't
    75  		// marked as bootstrapped on the second pass.
    76  		signalCh <- false
    77  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
    78  
    79  		// Still bootstrap retry. Bootstrapper completes in-memory range without errors. DB finishes bootstrapping.
    80  		signalCh <- false
    81  	}()
    82  
    83  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
    84  	defer func() {
    85  		require.NoError(t, setup.StopServer())
    86  	}()
    87  
    88  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
    89  	assertRetryMetric(t, testScope, "other")
    90  }
    91  
    92  func TestBootstrapRetriesDueToObsoleteRanges(t *testing.T) {
    93  	// Setup the test bootstrapper to only proceed when a signal is sent.
    94  	signalCh := make(chan struct{})
    95  
    96  	setup, testScope := bootstrapRetryTestSetup(t, func(
    97  		ctx context.Context,
    98  		namespaces bootstrap.Namespaces,
    99  		cache bootstrap.Cache,
   100  	) (bootstrap.NamespaceResults, error) {
   101  		// read from signalCh twice so we could advance the clock exactly in between of those signals
   102  		<-signalCh
   103  		<-signalCh
   104  		bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide()
   105  		require.NoError(t, err)
   106  		return bs.Bootstrap(ctx, namespaces, cache)
   107  	})
   108  
   109  	go assertBootstrapRetry(t, setup, signalCh)
   110  
   111  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
   112  	defer func() {
   113  		require.NoError(t, setup.StopServer())
   114  	}()
   115  
   116  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
   117  	assertRetryMetric(t, testScope, "obsolete-ranges")
   118  }
   119  
   120  func TestNoOpenFilesWhenBootstrapRetriesDueToObsoleteRanges(t *testing.T) {
   121  	// Setup the test bootstrapper to only proceed when a signal is sent.
   122  	signalCh := make(chan struct{})
   123  
   124  	setup, testScope := bootstrapRetryTestSetup(t, func(
   125  		ctx context.Context,
   126  		namespaces bootstrap.Namespaces,
   127  		cache bootstrap.Cache,
   128  	) (bootstrap.NamespaceResults, error) {
   129  		// read from signalCh twice so we could advance the clock exactly in between of those signals
   130  		<-signalCh
   131  		<-signalCh
   132  		bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide()
   133  		require.NoError(t, err)
   134  		return bs.Bootstrap(ctx, namespaces, cache)
   135  	})
   136  
   137  	go assertBootstrapRetry(t, setup, signalCh)
   138  
   139  	// Write test data
   140  	now := setup.NowFn()()
   141  
   142  	fooSeries := generate.Series{
   143  		ID:   ident.StringID("foo"),
   144  		Tags: ident.NewTags(ident.StringTag("city", "new_york"), ident.StringTag("foo", "foo")),
   145  	}
   146  
   147  	barSeries := generate.Series{
   148  		ID:   ident.StringID("bar"),
   149  		Tags: ident.NewTags(ident.StringTag("city", "new_jersey")),
   150  	}
   151  
   152  	bazSeries := generate.Series{
   153  		ID:   ident.StringID("baz"),
   154  		Tags: ident.NewTags(ident.StringTag("city", "seattle")),
   155  	}
   156  
   157  	blockSize := 2 * time.Hour
   158  
   159  	ns1 := setup.Namespaces()[0]
   160  	seriesMaps := generate.BlocksByStart([]generate.BlockConfig{
   161  		{
   162  			IDs:       []string{fooSeries.ID.String()},
   163  			Tags:      fooSeries.Tags,
   164  			NumPoints: 100,
   165  			Start:     now.Add(-1 * blockSize),
   166  		},
   167  		{
   168  			IDs:       []string{barSeries.ID.String()},
   169  			Tags:      barSeries.Tags,
   170  			NumPoints: 100,
   171  			Start:     now.Add(-1 * blockSize),
   172  		},
   173  		{
   174  			IDs:       []string{fooSeries.ID.String()},
   175  			Tags:      fooSeries.Tags,
   176  			NumPoints: 100,
   177  			Start:     now.Add(1 * blockSize),
   178  		},
   179  		{
   180  			IDs:       []string{barSeries.ID.String()},
   181  			Tags:      barSeries.Tags,
   182  			NumPoints: 100,
   183  			Start:     now.Add(1 * blockSize),
   184  		},
   185  		{
   186  			IDs:       []string{fooSeries.ID.String()},
   187  			Tags:      fooSeries.Tags,
   188  			NumPoints: 50,
   189  			Start:     now,
   190  		},
   191  		{
   192  			IDs:       []string{bazSeries.ID.String()},
   193  			Tags:      bazSeries.Tags,
   194  			NumPoints: 50,
   195  			Start:     now,
   196  		},
   197  	})
   198  
   199  	require.NoError(t, writeTestDataToDiskWithIndex(ns1, setup, seriesMaps))
   200  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
   201  	defer func() {
   202  		require.NoError(t, setup.StopServerAndVerifyOpenFilesAreClosed())
   203  		setup.Close()
   204  	}()
   205  
   206  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
   207  	assertRetryMetric(t, testScope, "obsolete-ranges")
   208  }
   209  
   210  func TestBootstrapRetriesDueToUnfulfilledRanges(t *testing.T) {
   211  	// Setup the test bootstrapper to only proceed when a signal is sent.
   212  	signalCh := make(chan bool)
   213  
   214  	setup, testScope := bootstrapRetryTestSetup(t, func(
   215  		ctx context.Context,
   216  		namespaces bootstrap.Namespaces,
   217  		cache bootstrap.Cache,
   218  	) (bootstrap.NamespaceResults, error) {
   219  		var provider bootstrap.BootstrapperProvider
   220  		shouldUnfulfill := <-signalCh
   221  		if shouldUnfulfill {
   222  			provider = bootstrapper.NewNoOpNoneBootstrapperProvider()
   223  		} else {
   224  			provider = bootstrapper.NewNoOpAllBootstrapperProvider()
   225  		}
   226  		bs, err := provider.Provide()
   227  		require.NoError(t, err)
   228  		return bs.Bootstrap(ctx, namespaces, cache)
   229  	})
   230  
   231  	go func() {
   232  		// Wait for server to get started by the main test method.
   233  		require.NoError(t, setup.WaitUntilServerIsUp())
   234  
   235  		// First bootstrap pass. Bootstrap produces unfulfilled ranges for persist range.
   236  		// Check if DB is not marked bootstrapped.
   237  		signalCh <- true
   238  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   239  		// Still first bootstrap pass. Bootstrap produces unfulfilled ranges for in-memory range.
   240  		// Check if DB is not marked bootstrapped.
   241  		signalCh <- true
   242  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   243  
   244  		// Bootstrap retry. Bootstrapper completes persist range fulfilling everything.
   245  		// Check if DB isn't marked as bootstrapped on the second pass.
   246  		signalCh <- false
   247  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   248  
   249  		// Still bootstrap retry. Bootstrapper completes in-memory range fulfilling everything.
   250  		// DB finishes bootstrapping.
   251  		signalCh <- false
   252  	}()
   253  
   254  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
   255  	defer func() {
   256  		require.NoError(t, setup.StopServer())
   257  	}()
   258  
   259  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
   260  
   261  	assertRetryMetric(t, testScope, "other")
   262  }
   263  
   264  func assertBootstrapRetry(t *testing.T, setup TestSetup, signalCh chan struct{}) {
   265  	// Wait for server to get started by the main test method.
   266  	require.NoError(t, setup.WaitUntilServerIsUp())
   267  
   268  	// First bootstrap pass, persist ranges. Check if DB is not marked bootstrapped and advance clock.
   269  	signalCh <- struct{}{}
   270  	assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   271  	setup.SetNowFn(setup.NowFn()().Add(2 * time.Hour))
   272  	signalCh <- struct{}{}
   273  
   274  	// Still first bootstrap pass, in-memory ranges. Due to advanced clock previously calculated
   275  	// ranges are obsolete. Check if DB is not marked bootstrapped.
   276  	signalCh <- struct{}{}
   277  	assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   278  	signalCh <- struct{}{}
   279  
   280  	// Bootstrap retry, persist ranges. Check if DB isn't marked as bootstrapped on the second pass.
   281  	signalCh <- struct{}{}
   282  	assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   283  	signalCh <- struct{}{}
   284  }
   285  
   286  type bootstrapFn = func(
   287  	ctx context.Context,
   288  	namespaces bootstrap.Namespaces,
   289  	cache bootstrap.Cache,
   290  ) (bootstrap.NamespaceResults, error)
   291  
   292  func bootstrapRetryTestSetup(t *testing.T, bootstrapFn bootstrapFn) (TestSetup, tally.TestScope) {
   293  	testScope := tally.NewTestScope("testScope", map[string]string{})
   294  
   295  	rOpts := retention.NewOptions().
   296  		SetRetentionPeriod(12 * time.Hour).
   297  		SetBufferPast(5 * time.Minute).
   298  		SetBufferFuture(5 * time.Minute)
   299  
   300  	ns1, err := namespace.NewMetadata(testNamespaces[0], namespace.NewOptions().SetRetentionOptions(rOpts))
   301  	require.NoError(t, err)
   302  	opts := NewTestOptions(t).
   303  		SetNamespaces([]namespace.Metadata{ns1}).
   304  		SetShardSetOptions(&TestShardSetOptions{
   305  			// Set all shards to initializing so bootstrap is
   306  			// retried on an obsolete range (which is not done
   307  			// if all shards are available and hence coming from disk).
   308  			ShardState: shard.Initializing,
   309  		})
   310  
   311  	setup, err := NewTestSetup(t, opts, nil, func(storageOpts storage.Options) storage.Options {
   312  		return storageOpts.SetInstrumentOptions(storageOpts.InstrumentOptions().SetMetricsScope(testScope))
   313  	})
   314  	require.NoError(t, err)
   315  	defer setup.Close()
   316  
   317  	var (
   318  		fsOpts = setup.StorageOpts().CommitLogOptions().FilesystemOptions()
   319  
   320  		bootstrapOpts          = newDefaulTestResultOptions(setup.StorageOpts())
   321  		bootstrapperSourceOpts = testBootstrapperSourceOptions{read: bootstrapFn}
   322  		processOpts            = bootstrap.NewProcessOptions().
   323  					SetTopologyMapProvider(setup).
   324  					SetOrigin(setup.Origin())
   325  	)
   326  	bootstrapOpts.SetInstrumentOptions(bootstrapOpts.InstrumentOptions().SetMetricsScope(testScope))
   327  	boostrapper := newTestBootstrapperSource(bootstrapperSourceOpts, bootstrapOpts, nil)
   328  
   329  	processProvider, err := bootstrap.NewProcessProvider(
   330  		boostrapper, processOpts, bootstrapOpts, fsOpts)
   331  	require.NoError(t, err)
   332  	setup.SetStorageOpts(setup.StorageOpts().SetBootstrapProcessProvider(processProvider))
   333  	return setup, testScope
   334  }
   335  
   336  func assertRetryMetric(t *testing.T, testScope tally.TestScope, expectedReason string) {
   337  	const (
   338  		metricName = "bootstrap-retries"
   339  		reasonTag  = "reason"
   340  	)
   341  	valuesByReason := make(map[string]int)
   342  	for _, counter := range testScope.Snapshot().Counters() {
   343  		if strings.Contains(counter.Name(), metricName) {
   344  			reason := ""
   345  			if r, ok := counter.Tags()[reasonTag]; ok {
   346  				reason = r
   347  			}
   348  			valuesByReason[reason] = int(counter.Value())
   349  		}
   350  	}
   351  
   352  	val, ok := valuesByReason[expectedReason]
   353  	if assert.True(t, ok, "missing metric for expected reason") {
   354  		assert.Equal(t, 1, val)
   355  	}
   356  	for r, val := range valuesByReason {
   357  		if r != expectedReason {
   358  			assert.Equal(t, 0, val)
   359  		}
   360  	}
   361  }