github.com/m3db/m3@v1.5.0/src/dbnode/integration/bootstrap_retries_test.go (about)

     1  // +build integration
     2  
     3  // Copyright (c) 2021 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package integration
    24  
    25  import (
    26  	"errors"
    27  	"strings"
    28  	"testing"
    29  	"time"
    30  
    31  	"github.com/stretchr/testify/assert"
    32  	"github.com/stretchr/testify/require"
    33  	"github.com/uber-go/tally"
    34  
    35  	"github.com/m3db/m3/src/cluster/shard"
    36  	"github.com/m3db/m3/src/dbnode/integration/generate"
    37  	"github.com/m3db/m3/src/dbnode/namespace"
    38  	"github.com/m3db/m3/src/dbnode/retention"
    39  	"github.com/m3db/m3/src/dbnode/storage"
    40  	"github.com/m3db/m3/src/dbnode/storage/bootstrap"
    41  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper"
    42  	"github.com/m3db/m3/src/x/context"
    43  	"github.com/m3db/m3/src/x/ident"
    44  )
    45  
    46  func TestBootstrapRetriesDueToError(t *testing.T) {
    47  	// Setup the test bootstrapper to only proceed when a signal is sent.
    48  	signalCh := make(chan bool)
    49  
    50  	setup, testScope := bootstrapRetryTestSetup(t, func(
    51  		ctx context.Context,
    52  		namespaces bootstrap.Namespaces,
    53  		cache bootstrap.Cache,
    54  	) (bootstrap.NamespaceResults, error) {
    55  		shouldError := <-signalCh
    56  		if shouldError {
    57  			return bootstrap.NamespaceResults{}, errors.New("error in bootstrapper")
    58  		}
    59  		// Mark all as fulfilled
    60  		bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide()
    61  		require.NoError(t, err)
    62  		return bs.Bootstrap(ctx, namespaces, cache)
    63  	})
    64  
    65  	go func() {
    66  		// Wait for server to get started by the main test method.
    67  		require.NoError(t, setup.WaitUntilServerIsUp())
    68  
    69  		// First bootstrap pass. Bootstrapper produces an error. Check if DB is not marked bootstrapped.
    70  		signalCh <- true
    71  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
    72  
    73  		// Bootstrap retry. Bootstrapper completes persist range without errors. Check if DB isn't
    74  		// marked as bootstrapped on the second pass.
    75  		signalCh <- false
    76  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
    77  
    78  		// Still bootstrap retry. Bootstrapper completes in-memory range without errors. DB finishes bootstrapping.
    79  		signalCh <- false
    80  	}()
    81  
    82  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
    83  	defer func() {
    84  		require.NoError(t, setup.StopServer())
    85  	}()
    86  
    87  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
    88  	assertRetryMetric(t, testScope, "other")
    89  }
    90  
    91  func TestBootstrapRetriesDueToObsoleteRanges(t *testing.T) {
    92  	// Setup the test bootstrapper to only proceed when a signal is sent.
    93  	signalCh := make(chan struct{})
    94  
    95  	setup, testScope := bootstrapRetryTestSetup(t, func(
    96  		ctx context.Context,
    97  		namespaces bootstrap.Namespaces,
    98  		cache bootstrap.Cache,
    99  	) (bootstrap.NamespaceResults, error) {
   100  		// read from signalCh twice so we could advance the clock exactly in between of those signals
   101  		<-signalCh
   102  		<-signalCh
   103  		bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide()
   104  		require.NoError(t, err)
   105  		return bs.Bootstrap(ctx, namespaces, cache)
   106  	})
   107  
   108  	go assertBootstrapRetry(t, setup, signalCh)
   109  
   110  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
   111  	defer func() {
   112  		require.NoError(t, setup.StopServer())
   113  	}()
   114  
   115  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
   116  	assertRetryMetric(t, testScope, "obsolete-ranges")
   117  }
   118  
   119  func TestNoOpenFilesWhenBootstrapRetriesDueToObsoleteRanges(t *testing.T) {
   120  	// Setup the test bootstrapper to only proceed when a signal is sent.
   121  	signalCh := make(chan struct{})
   122  
   123  	setup, testScope := bootstrapRetryTestSetup(t, func(
   124  		ctx context.Context,
   125  		namespaces bootstrap.Namespaces,
   126  		cache bootstrap.Cache,
   127  	) (bootstrap.NamespaceResults, error) {
   128  		// read from signalCh twice so we could advance the clock exactly in between of those signals
   129  		<-signalCh
   130  		<-signalCh
   131  		bs, err := bootstrapper.NewNoOpAllBootstrapperProvider().Provide()
   132  		require.NoError(t, err)
   133  		return bs.Bootstrap(ctx, namespaces, cache)
   134  	})
   135  
   136  	go assertBootstrapRetry(t, setup, signalCh)
   137  
   138  	// Write test data
   139  	now := setup.NowFn()()
   140  
   141  	fooSeries := generate.Series{
   142  		ID:   ident.StringID("foo"),
   143  		Tags: ident.NewTags(ident.StringTag("city", "new_york"), ident.StringTag("foo", "foo")),
   144  	}
   145  
   146  	barSeries := generate.Series{
   147  		ID:   ident.StringID("bar"),
   148  		Tags: ident.NewTags(ident.StringTag("city", "new_jersey")),
   149  	}
   150  
   151  	bazSeries := generate.Series{
   152  		ID:   ident.StringID("baz"),
   153  		Tags: ident.NewTags(ident.StringTag("city", "seattle")),
   154  	}
   155  
   156  	blockSize := 2 * time.Hour
   157  
   158  	ns1 := setup.Namespaces()[0]
   159  	seriesMaps := generate.BlocksByStart([]generate.BlockConfig{
   160  		{
   161  			IDs:       []string{fooSeries.ID.String()},
   162  			Tags:      fooSeries.Tags,
   163  			NumPoints: 100,
   164  			Start:     now.Add(-1 * blockSize),
   165  		},
   166  		{
   167  			IDs:       []string{barSeries.ID.String()},
   168  			Tags:      barSeries.Tags,
   169  			NumPoints: 100,
   170  			Start:     now.Add(-1 * blockSize),
   171  		},
   172  		{
   173  			IDs:       []string{fooSeries.ID.String()},
   174  			Tags:      fooSeries.Tags,
   175  			NumPoints: 100,
   176  			Start:     now.Add(1 * blockSize),
   177  		},
   178  		{
   179  			IDs:       []string{barSeries.ID.String()},
   180  			Tags:      barSeries.Tags,
   181  			NumPoints: 100,
   182  			Start:     now.Add(1 * blockSize),
   183  		},
   184  		{
   185  			IDs:       []string{fooSeries.ID.String()},
   186  			Tags:      fooSeries.Tags,
   187  			NumPoints: 50,
   188  			Start:     now,
   189  		},
   190  		{
   191  			IDs:       []string{bazSeries.ID.String()},
   192  			Tags:      bazSeries.Tags,
   193  			NumPoints: 50,
   194  			Start:     now,
   195  		},
   196  	})
   197  
   198  	require.NoError(t, writeTestDataToDiskWithIndex(ns1, setup, seriesMaps))
   199  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
   200  	defer func() {
   201  		require.NoError(t, setup.StopServerAndVerifyOpenFilesAreClosed())
   202  		setup.Close()
   203  	}()
   204  
   205  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
   206  	assertRetryMetric(t, testScope, "obsolete-ranges")
   207  }
   208  
   209  func TestBootstrapRetriesDueToUnfulfilledRanges(t *testing.T) {
   210  	// Setup the test bootstrapper to only proceed when a signal is sent.
   211  	signalCh := make(chan bool)
   212  
   213  	setup, testScope := bootstrapRetryTestSetup(t, func(
   214  		ctx context.Context,
   215  		namespaces bootstrap.Namespaces,
   216  		cache bootstrap.Cache,
   217  	) (bootstrap.NamespaceResults, error) {
   218  		var provider bootstrap.BootstrapperProvider
   219  		shouldUnfulfill := <-signalCh
   220  		if shouldUnfulfill {
   221  			provider = bootstrapper.NewNoOpNoneBootstrapperProvider()
   222  		} else {
   223  			provider = bootstrapper.NewNoOpAllBootstrapperProvider()
   224  		}
   225  		bs, err := provider.Provide()
   226  		require.NoError(t, err)
   227  		return bs.Bootstrap(ctx, namespaces, cache)
   228  	})
   229  
   230  	go func() {
   231  		// Wait for server to get started by the main test method.
   232  		require.NoError(t, setup.WaitUntilServerIsUp())
   233  
   234  		// First bootstrap pass. Bootstrap produces unfulfilled ranges for persist range.
   235  		// Check if DB is not marked bootstrapped.
   236  		signalCh <- true
   237  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   238  		// Still first bootstrap pass. Bootstrap produces unfulfilled ranges for in-memory range.
   239  		// Check if DB is not marked bootstrapped.
   240  		signalCh <- true
   241  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   242  
   243  		// Bootstrap retry. Bootstrapper completes persist range fulfilling everything.
   244  		// Check if DB isn't marked as bootstrapped on the second pass.
   245  		signalCh <- false
   246  		assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   247  
   248  		// Still bootstrap retry. Bootstrapper completes in-memory range fulfilling everything.
   249  		// DB finishes bootstrapping.
   250  		signalCh <- false
   251  	}()
   252  
   253  	require.NoError(t, setup.StartServer()) // Blocks until bootstrap is complete
   254  	defer func() {
   255  		require.NoError(t, setup.StopServer())
   256  	}()
   257  
   258  	assert.True(t, setup.DB().IsBootstrapped(), "database should be bootstrapped")
   259  
   260  	assertRetryMetric(t, testScope, "other")
   261  }
   262  
   263  func assertBootstrapRetry(t *testing.T, setup TestSetup, signalCh chan struct{}) {
   264  	// Wait for server to get started by the main test method.
   265  	require.NoError(t, setup.WaitUntilServerIsUp())
   266  
   267  	// First bootstrap pass, persist ranges. Check if DB is not marked bootstrapped and advance clock.
   268  	signalCh <- struct{}{}
   269  	assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   270  	setup.SetNowFn(setup.NowFn()().Add(2 * time.Hour))
   271  	signalCh <- struct{}{}
   272  
   273  	// Still first bootstrap pass, in-memory ranges. Due to advanced clock previously calculated
   274  	// ranges are obsolete. Check if DB is not marked bootstrapped.
   275  	signalCh <- struct{}{}
   276  	assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   277  	signalCh <- struct{}{}
   278  
   279  	// Bootstrap retry, persist ranges. Check if DB isn't marked as bootstrapped on the second pass.
   280  	signalCh <- struct{}{}
   281  	assert.False(t, setup.DB().IsBootstrapped(), "database should not yet be bootstrapped")
   282  	signalCh <- struct{}{}
   283  }
   284  
   285  type bootstrapFn = func(
   286  	ctx context.Context,
   287  	namespaces bootstrap.Namespaces,
   288  	cache bootstrap.Cache,
   289  ) (bootstrap.NamespaceResults, error)
   290  
   291  func bootstrapRetryTestSetup(t *testing.T, bootstrapFn bootstrapFn) (TestSetup, tally.TestScope) {
   292  	testScope := tally.NewTestScope("testScope", map[string]string{})
   293  
   294  	rOpts := retention.NewOptions().
   295  		SetRetentionPeriod(12 * time.Hour).
   296  		SetBufferPast(5 * time.Minute).
   297  		SetBufferFuture(5 * time.Minute)
   298  
   299  	ns1, err := namespace.NewMetadata(testNamespaces[0], namespace.NewOptions().SetRetentionOptions(rOpts))
   300  	require.NoError(t, err)
   301  	opts := NewTestOptions(t).
   302  		SetNamespaces([]namespace.Metadata{ns1}).
   303  		SetShardSetOptions(&TestShardSetOptions{
   304  			// Set all shards to initializing so bootstrap is
   305  			// retried on an obsolete range (which is not done
   306  			// if all shards are available and hence coming from disk).
   307  			ShardState: shard.Initializing,
   308  		})
   309  
   310  	setup, err := NewTestSetup(t, opts, nil, func(storageOpts storage.Options) storage.Options {
   311  		return storageOpts.SetInstrumentOptions(storageOpts.InstrumentOptions().SetMetricsScope(testScope))
   312  	})
   313  	require.NoError(t, err)
   314  	defer setup.Close()
   315  
   316  	var (
   317  		fsOpts = setup.StorageOpts().CommitLogOptions().FilesystemOptions()
   318  
   319  		bootstrapOpts          = newDefaulTestResultOptions(setup.StorageOpts())
   320  		bootstrapperSourceOpts = testBootstrapperSourceOptions{read: bootstrapFn}
   321  		processOpts            = bootstrap.NewProcessOptions().
   322  					SetTopologyMapProvider(setup).
   323  					SetOrigin(setup.Origin())
   324  	)
   325  	bootstrapOpts.SetInstrumentOptions(bootstrapOpts.InstrumentOptions().SetMetricsScope(testScope))
   326  	boostrapper := newTestBootstrapperSource(bootstrapperSourceOpts, bootstrapOpts, nil)
   327  
   328  	processProvider, err := bootstrap.NewProcessProvider(
   329  		boostrapper, processOpts, bootstrapOpts, fsOpts)
   330  	require.NoError(t, err)
   331  	setup.SetStorageOpts(setup.StorageOpts().SetBootstrapProcessProvider(processProvider))
   332  	return setup, testScope
   333  }
   334  
   335  func assertRetryMetric(t *testing.T, testScope tally.TestScope, expectedReason string) {
   336  	const (
   337  		metricName = "bootstrap-retries"
   338  		reasonTag  = "reason"
   339  	)
   340  	valuesByReason := make(map[string]int)
   341  	for _, counter := range testScope.Snapshot().Counters() {
   342  		if strings.Contains(counter.Name(), metricName) {
   343  			reason := ""
   344  			if r, ok := counter.Tags()[reasonTag]; ok {
   345  				reason = r
   346  			}
   347  			valuesByReason[reason] = int(counter.Value())
   348  		}
   349  	}
   350  
   351  	val, ok := valuesByReason[expectedReason]
   352  	if assert.True(t, ok, "missing metric for expected reason") {
   353  		assert.Equal(t, 1, val)
   354  	}
   355  	for r, val := range valuesByReason {
   356  		if r != expectedReason {
   357  			assert.Equal(t, 0, val)
   358  		}
   359  	}
   360  }