github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/services/m3dbnode/config/bootstrap.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package config
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  
    27  	"github.com/m3db/m3/src/dbnode/client"
    28  	"github.com/m3db/m3/src/dbnode/persist/fs"
    29  	"github.com/m3db/m3/src/dbnode/persist/fs/migration"
    30  	"github.com/m3db/m3/src/dbnode/storage"
    31  	"github.com/m3db/m3/src/dbnode/storage/bootstrap"
    32  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper"
    33  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/commitlog"
    34  	bfs "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/fs"
    35  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/peers"
    36  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/uninitialized"
    37  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/result"
    38  	"github.com/m3db/m3/src/dbnode/storage/index"
    39  	"github.com/m3db/m3/src/dbnode/storage/index/compaction"
    40  	"github.com/m3db/m3/src/dbnode/topology"
    41  	"github.com/m3db/m3/src/m3ninx/index/segment/fst"
    42  )
    43  
    44  var (
    45  	// defaultNumProcessorsPerCPU is the default number of processors per CPU.
    46  	defaultNumProcessorsPerCPU = 0.125
    47  
    48  	// default order in which bootstrappers are run
    49  	// (run in ascending order of precedence).
    50  	defaultOrderedBootstrappers = []string{
    51  		// Filesystem bootstrapping must be first.
    52  		bfs.FileSystemBootstrapperName,
    53  		// Peers and commitlog must come before the uninitialized topology bootrapping.
    54  		commitlog.CommitLogBootstrapperName,
    55  		peers.PeersBootstrapperName,
    56  		uninitialized.UninitializedTopologyBootstrapperName,
    57  	}
    58  
    59  	// bootstrapper order where peers is prefered over commitlog.
    60  	preferPeersOrderedBootstrappers = []string{
    61  		// Filesystem bootstrapping must be first.
    62  		bfs.FileSystemBootstrapperName,
    63  		// Prefer peers over commitlog.
    64  		peers.PeersBootstrapperName,
    65  		commitlog.CommitLogBootstrapperName,
    66  		uninitialized.UninitializedTopologyBootstrapperName,
    67  	}
    68  
    69  	// bootstrapper order where commitlog is excluded.
    70  	excludeCommitLogOrderedBootstrappers = []string{
    71  		// Filesystem bootstrapping must be first.
    72  		bfs.FileSystemBootstrapperName,
    73  		// Commitlog excluded.
    74  		peers.PeersBootstrapperName,
    75  		uninitialized.UninitializedTopologyBootstrapperName,
    76  	}
    77  
    78  	validBootstrapModes = []BootstrapMode{
    79  		DefaultBootstrapMode,
    80  		PreferPeersBootstrapMode,
    81  		ExcludeCommitLogBootstrapMode,
    82  	}
    83  
    84  	errReadBootstrapModeInvalid = errors.New("bootstrap mode invalid")
    85  )
    86  
    87  // BootstrapMode defines the mode in which bootstrappers are run.
    88  type BootstrapMode uint
    89  
    90  const (
    91  	// DefaultBootstrapMode executes bootstrappers in default order.
    92  	DefaultBootstrapMode BootstrapMode = iota
    93  	// PreferPeersBootstrapMode executes peers before commitlog bootstrapper.
    94  	PreferPeersBootstrapMode
    95  	// ExcludeCommitLogBootstrapMode executes all default bootstrappers except commitlog.
    96  	ExcludeCommitLogBootstrapMode
    97  )
    98  
    99  // MarshalYAML marshals a BootstrapMode.
   100  func (m *BootstrapMode) MarshalYAML() (interface{}, error) {
   101  	return m.String(), nil
   102  }
   103  
   104  // UnmarshalYAML unmarshals an BootstrapMode into a valid type from string.
   105  func (m *BootstrapMode) UnmarshalYAML(unmarshal func(interface{}) error) error {
   106  	var str string
   107  	if err := unmarshal(&str); err != nil {
   108  		return err
   109  	}
   110  
   111  	// If unspecified, use default mode.
   112  	if str == "" {
   113  		*m = DefaultBootstrapMode
   114  		return nil
   115  	}
   116  
   117  	for _, valid := range validBootstrapModes {
   118  		if str == valid.String() {
   119  			*m = valid
   120  			return nil
   121  		}
   122  	}
   123  	return fmt.Errorf("invalid BootstrapMode '%s' valid types are: %s",
   124  		str, validBootstrapModes)
   125  }
   126  
   127  // String returns the bootstrap mode as a string
   128  func (m BootstrapMode) String() string {
   129  	switch m {
   130  	case DefaultBootstrapMode:
   131  		return "default"
   132  	case PreferPeersBootstrapMode:
   133  		return "prefer_peers"
   134  	case ExcludeCommitLogBootstrapMode:
   135  		return "exclude_commitlog"
   136  	}
   137  	return "unknown"
   138  }
   139  
   140  // BootstrapConfiguration specifies the config for bootstrappers.
   141  type BootstrapConfiguration struct {
   142  	// BootstrapMode defines the mode in which bootstrappers are run.
   143  	BootstrapMode *BootstrapMode `yaml:"mode"`
   144  
   145  	// Filesystem bootstrapper configuration.
   146  	Filesystem *BootstrapFilesystemConfiguration `yaml:"filesystem"`
   147  
   148  	// Commitlog bootstrapper configuration.
   149  	Commitlog *BootstrapCommitlogConfiguration `yaml:"commitlog"`
   150  
   151  	// Peers bootstrapper configuration.
   152  	Peers *BootstrapPeersConfiguration `yaml:"peers"`
   153  
   154  	// CacheSeriesMetadata determines whether individual bootstrappers cache
   155  	// series metadata across all calls (namespaces / shards / blocks).
   156  	CacheSeriesMetadata *bool `yaml:"cacheSeriesMetadata"`
   157  
   158  	// IndexSegmentConcurrency determines the concurrency for building index
   159  	// segments.
   160  	IndexSegmentConcurrency *int `yaml:"indexSegmentConcurrency"`
   161  
   162  	// Verify specifies verification checks.
   163  	Verify *BootstrapVerifyConfiguration `yaml:"verify"`
   164  }
   165  
   166  // VerifyOrDefault returns verify configuration or default.
   167  func (bsc BootstrapConfiguration) VerifyOrDefault() BootstrapVerifyConfiguration {
   168  	if bsc.Verify == nil {
   169  		return BootstrapVerifyConfiguration{}
   170  	}
   171  
   172  	return *bsc.Verify
   173  }
   174  
   175  // BootstrapVerifyConfiguration outlines verification checks to enable
   176  // during a bootstrap.
   177  type BootstrapVerifyConfiguration struct {
   178  	VerifyIndexSegments *bool `yaml:"verifyIndexSegments"`
   179  }
   180  
   181  // VerifyIndexSegmentsOrDefault returns whether to verify index segments
   182  // or use default value.
   183  func (c BootstrapVerifyConfiguration) VerifyIndexSegmentsOrDefault() bool {
   184  	if c.VerifyIndexSegments == nil {
   185  		return false
   186  	}
   187  
   188  	return *c.VerifyIndexSegments
   189  }
   190  
   191  // BootstrapFilesystemConfiguration specifies config for the fs bootstrapper.
   192  type BootstrapFilesystemConfiguration struct {
   193  	// DeprecatedNumProcessorsPerCPU is the number of processors per CPU.
   194  	// TODO: Remove, this is deprecated since BootstrapDataNumProcessors() is
   195  	// no longer actually used anywhere.
   196  	DeprecatedNumProcessorsPerCPU float64 `yaml:"numProcessorsPerCPU" validate:"min=0.0"`
   197  
   198  	// Migration configuration specifies what version, if any, existing data filesets should be migrated to
   199  	// if necessary.
   200  	Migration *BootstrapMigrationConfiguration `yaml:"migration"`
   201  }
   202  
   203  func (c BootstrapFilesystemConfiguration) migration() BootstrapMigrationConfiguration {
   204  	if cfg := c.Migration; cfg != nil {
   205  		return *cfg
   206  	}
   207  	return BootstrapMigrationConfiguration{}
   208  }
   209  
   210  func newDefaultBootstrapFilesystemConfiguration() BootstrapFilesystemConfiguration {
   211  	return BootstrapFilesystemConfiguration{
   212  		Migration: &BootstrapMigrationConfiguration{},
   213  	}
   214  }
   215  
   216  // BootstrapMigrationConfiguration specifies configuration for data migrations during bootstrapping.
   217  type BootstrapMigrationConfiguration struct {
   218  	// TargetMigrationVersion indicates that we should attempt to upgrade filesets to
   219  	// what’s expected of the specified version.
   220  	TargetMigrationVersion migration.MigrationVersion `yaml:"targetMigrationVersion"`
   221  
   222  	// Concurrency sets the number of concurrent workers performing migrations.
   223  	Concurrency int `yaml:"concurrency"`
   224  }
   225  
   226  // NewOptions generates migration.Options from the configuration.
   227  func (m BootstrapMigrationConfiguration) NewOptions() migration.Options {
   228  	opts := migration.NewOptions().SetTargetMigrationVersion(m.TargetMigrationVersion)
   229  
   230  	if m.Concurrency > 0 {
   231  		opts = opts.SetConcurrency(m.Concurrency)
   232  	}
   233  
   234  	return opts
   235  }
   236  
   237  // BootstrapCommitlogConfiguration specifies config for the commitlog bootstrapper.
   238  type BootstrapCommitlogConfiguration struct {
   239  	// ReturnUnfulfilledForCorruptCommitLogFiles controls whether the commitlog bootstrapper
   240  	// will return unfulfilled for all shard time ranges when it encounters a corrupt commit
   241  	// file. Note that regardless of this value, the commitlog bootstrapper will still try and
   242  	// read all the uncorrupted commitlog files and return as much data as it can, but setting
   243  	// this to true allows the node to attempt a repair if the peers bootstrapper is configured
   244  	// after the commitlog bootstrapper.
   245  	ReturnUnfulfilledForCorruptCommitLogFiles bool `yaml:"returnUnfulfilledForCorruptCommitLogFiles"`
   246  }
   247  
   248  func newDefaultBootstrapCommitlogConfiguration() BootstrapCommitlogConfiguration {
   249  	return BootstrapCommitlogConfiguration{
   250  		ReturnUnfulfilledForCorruptCommitLogFiles: commitlog.DefaultReturnUnfulfilledForCorruptCommitLogFiles,
   251  	}
   252  }
   253  
   254  // BootstrapPeersConfiguration specifies config for the peers bootstrapper.
   255  type BootstrapPeersConfiguration struct {
   256  	// StreamShardConcurrency controls how many shards in parallel to stream
   257  	// for in memory data being streamed between peers (most recent block).
   258  	// Defaults to: numCPU.
   259  	StreamShardConcurrency *int `yaml:"streamShardConcurrency"`
   260  	// StreamPersistShardConcurrency controls how many shards in parallel to stream
   261  	// for historical data being streamed between peers (historical blocks).
   262  	// Defaults to: numCPU / 2.
   263  	StreamPersistShardConcurrency *int `yaml:"streamPersistShardConcurrency"`
   264  	// StreamPersistShardFlushConcurrency controls how many shards in parallel to flush
   265  	// for historical data being streamed between peers (historical blocks).
   266  	// Defaults to: 1.
   267  	StreamPersistShardFlushConcurrency *int `yaml:"streamPersistShardFlushConcurrency"`
   268  }
   269  
   270  // New creates a bootstrap process based on the bootstrap configuration.
   271  func (bsc BootstrapConfiguration) New(
   272  	rsOpts result.Options,
   273  	opts storage.Options,
   274  	topoMapProvider topology.MapProvider,
   275  	origin topology.Host,
   276  	adminClient client.AdminClient,
   277  ) (bootstrap.ProcessProvider, error) {
   278  	idxOpts := opts.IndexOptions()
   279  	compactor, err := compaction.NewCompactor(idxOpts.MetadataArrayPool(),
   280  		index.MetadataArrayPoolCapacity,
   281  		idxOpts.SegmentBuilderOptions(),
   282  		idxOpts.FSTSegmentOptions(),
   283  		compaction.CompactorOptions{
   284  			FSTWriterOptions: &fst.WriterOptions{
   285  				// DisableRegistry is set to true to trade a larger FST size
   286  				// for a faster FST compaction since we want to reduce the end
   287  				// to end latency for time to first index a metric.
   288  				DisableRegistry: true,
   289  			},
   290  		})
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  
   295  	var (
   296  		bs                   bootstrap.BootstrapperProvider
   297  		fsOpts               = opts.CommitLogOptions().FilesystemOptions()
   298  		orderedBootstrappers = bsc.orderedBootstrappers()
   299  	)
   300  	// Start from the end of the list because the bootstrappers are ordered by precedence in descending order.
   301  	// I.e. each bootstrapper wraps the preceding bootstrapper, and so the outer-most bootstrapper is run first.
   302  	for i := len(orderedBootstrappers) - 1; i >= 0; i-- {
   303  		switch orderedBootstrappers[i] {
   304  		case bootstrapper.NoOpAllBootstrapperName:
   305  			bs = bootstrapper.NewNoOpAllBootstrapperProvider()
   306  		case bootstrapper.NoOpNoneBootstrapperName:
   307  			bs = bootstrapper.NewNoOpNoneBootstrapperProvider()
   308  		case bfs.FileSystemBootstrapperName:
   309  			fsCfg := bsc.filesystemConfig()
   310  			fsbOpts := bfs.NewOptions().
   311  				SetInstrumentOptions(opts.InstrumentOptions()).
   312  				SetResultOptions(rsOpts).
   313  				SetFilesystemOptions(fsOpts).
   314  				SetIndexOptions(opts.IndexOptions()).
   315  				SetPersistManager(opts.PersistManager()).
   316  				SetIndexClaimsManager(opts.IndexClaimsManager()).
   317  				SetCompactor(compactor).
   318  				SetRuntimeOptionsManager(opts.RuntimeOptionsManager()).
   319  				SetIdentifierPool(opts.IdentifierPool()).
   320  				SetMigrationOptions(fsCfg.migration().NewOptions()).
   321  				SetStorageOptions(opts).
   322  				SetIndexSegmentsVerify(bsc.VerifyOrDefault().VerifyIndexSegmentsOrDefault())
   323  			if v := bsc.IndexSegmentConcurrency; v != nil {
   324  				fsbOpts = fsbOpts.SetIndexSegmentConcurrency(*v)
   325  			}
   326  			if err := fsbOpts.Validate(); err != nil {
   327  				return nil, err
   328  			}
   329  			bs, err = bfs.NewFileSystemBootstrapperProvider(fsbOpts, bs)
   330  			if err != nil {
   331  				return nil, err
   332  			}
   333  		case commitlog.CommitLogBootstrapperName:
   334  			cCfg := bsc.commitlogConfig()
   335  			cOpts := commitlog.NewOptions().
   336  				SetResultOptions(rsOpts).
   337  				SetCommitLogOptions(opts.CommitLogOptions()).
   338  				SetRuntimeOptionsManager(opts.RuntimeOptionsManager()).
   339  				SetReturnUnfulfilledForCorruptCommitLogFiles(cCfg.ReturnUnfulfilledForCorruptCommitLogFiles)
   340  			if err := cOpts.Validate(); err != nil {
   341  				return nil, err
   342  			}
   343  			inspection, err := fs.InspectFilesystem(fsOpts)
   344  			if err != nil {
   345  				return nil, err
   346  			}
   347  			bs, err = commitlog.NewCommitLogBootstrapperProvider(cOpts, inspection, bs)
   348  			if err != nil {
   349  				return nil, err
   350  			}
   351  		case peers.PeersBootstrapperName:
   352  			pCfg := bsc.peersConfig()
   353  			pOpts := peers.NewOptions().
   354  				SetResultOptions(rsOpts).
   355  				SetFilesystemOptions(fsOpts).
   356  				SetIndexOptions(opts.IndexOptions()).
   357  				SetAdminClient(adminClient).
   358  				SetPersistManager(opts.PersistManager()).
   359  				SetIndexClaimsManager(opts.IndexClaimsManager()).
   360  				SetCompactor(compactor).
   361  				SetRuntimeOptionsManager(opts.RuntimeOptionsManager()).
   362  				SetContextPool(opts.ContextPool())
   363  			if pCfg.StreamShardConcurrency != nil {
   364  				pOpts = pOpts.SetDefaultShardConcurrency(*pCfg.StreamShardConcurrency)
   365  			}
   366  			if pCfg.StreamPersistShardConcurrency != nil {
   367  				pOpts = pOpts.SetShardPersistenceConcurrency(*pCfg.StreamPersistShardConcurrency)
   368  			}
   369  			if pCfg.StreamPersistShardFlushConcurrency != nil {
   370  				pOpts = pOpts.SetShardPersistenceFlushConcurrency(*pCfg.StreamPersistShardFlushConcurrency)
   371  			}
   372  			if v := bsc.IndexSegmentConcurrency; v != nil {
   373  				pOpts = pOpts.SetIndexSegmentConcurrency(*v)
   374  			}
   375  			if err := pOpts.Validate(); err != nil {
   376  				return nil, err
   377  			}
   378  			bs, err = peers.NewPeersBootstrapperProvider(pOpts, bs)
   379  			if err != nil {
   380  				return nil, err
   381  			}
   382  		case uninitialized.UninitializedTopologyBootstrapperName:
   383  			uOpts := uninitialized.NewOptions().
   384  				SetResultOptions(rsOpts).
   385  				SetInstrumentOptions(opts.InstrumentOptions())
   386  			if err := uOpts.Validate(); err != nil {
   387  				return nil, err
   388  			}
   389  			bs = uninitialized.NewUninitializedTopologyBootstrapperProvider(uOpts, bs)
   390  		default:
   391  			return nil, fmt.Errorf("unknown bootstrapper: %s", orderedBootstrappers[i])
   392  		}
   393  	}
   394  
   395  	providerOpts := bootstrap.NewProcessOptions().
   396  		SetTopologyMapProvider(topoMapProvider).
   397  		SetOrigin(origin)
   398  	if bsc.CacheSeriesMetadata != nil {
   399  		providerOpts = providerOpts.SetCacheSeriesMetadata(*bsc.CacheSeriesMetadata)
   400  	}
   401  	return bootstrap.NewProcessProvider(bs, providerOpts, rsOpts, fsOpts)
   402  }
   403  
   404  func (bsc BootstrapConfiguration) filesystemConfig() BootstrapFilesystemConfiguration {
   405  	if cfg := bsc.Filesystem; cfg != nil {
   406  		return *cfg
   407  	}
   408  	return newDefaultBootstrapFilesystemConfiguration()
   409  }
   410  
   411  func (bsc BootstrapConfiguration) commitlogConfig() BootstrapCommitlogConfiguration {
   412  	if cfg := bsc.Commitlog; cfg != nil {
   413  		return *cfg
   414  	}
   415  	return newDefaultBootstrapCommitlogConfiguration()
   416  }
   417  
   418  func (bsc BootstrapConfiguration) peersConfig() BootstrapPeersConfiguration {
   419  	if cfg := bsc.Peers; cfg != nil {
   420  		return *cfg
   421  	}
   422  	return BootstrapPeersConfiguration{}
   423  }
   424  
   425  func (bsc BootstrapConfiguration) orderedBootstrappers() []string {
   426  	if bsc.BootstrapMode != nil {
   427  		switch *bsc.BootstrapMode {
   428  		case DefaultBootstrapMode:
   429  			return defaultOrderedBootstrappers
   430  		case PreferPeersBootstrapMode:
   431  			return preferPeersOrderedBootstrappers
   432  		case ExcludeCommitLogBootstrapMode:
   433  			return excludeCommitLogOrderedBootstrappers
   434  		}
   435  	}
   436  	return defaultOrderedBootstrappers
   437  }