github.com/m3db/m3@v1.5.0/src/cmd/services/m3dbnode/config/bootstrap.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package config
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  
    27  	"github.com/m3db/m3/src/dbnode/client"
    28  	"github.com/m3db/m3/src/dbnode/persist/fs"
    29  	"github.com/m3db/m3/src/dbnode/persist/fs/migration"
    30  	"github.com/m3db/m3/src/dbnode/storage"
    31  	"github.com/m3db/m3/src/dbnode/storage/bootstrap"
    32  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper"
    33  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/commitlog"
    34  	bfs "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/fs"
    35  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/peers"
    36  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/uninitialized"
    37  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/result"
    38  	"github.com/m3db/m3/src/dbnode/storage/index"
    39  	"github.com/m3db/m3/src/dbnode/storage/index/compaction"
    40  	"github.com/m3db/m3/src/dbnode/topology"
    41  	"github.com/m3db/m3/src/m3ninx/index/segment/fst"
    42  )
    43  
    44  var (
    45  	// defaultNumProcessorsPerCPU is the default number of processors per CPU.
    46  	defaultNumProcessorsPerCPU = 0.125
    47  
    48  	// default order in which bootstrappers are run
    49  	// (run in ascending order of precedence).
    50  	defaultOrderedBootstrappers = []string{
    51  		// Filesystem bootstrapping must be first.
    52  		bfs.FileSystemBootstrapperName,
    53  		// Peers and commitlog must come before the uninitialized topology bootrapping.
    54  		commitlog.CommitLogBootstrapperName,
    55  		peers.PeersBootstrapperName,
    56  		uninitialized.UninitializedTopologyBootstrapperName,
    57  	}
    58  
    59  	// bootstrapper order where peers is prefered over commitlog.
    60  	preferPeersOrderedBootstrappers = []string{
    61  		// Filesystem bootstrapping must be first.
    62  		bfs.FileSystemBootstrapperName,
    63  		// Prefer peers over commitlog.
    64  		peers.PeersBootstrapperName,
    65  		commitlog.CommitLogBootstrapperName,
    66  		uninitialized.UninitializedTopologyBootstrapperName,
    67  	}
    68  
    69  	// bootstrapper order where commitlog is excluded.
    70  	excludeCommitLogOrderedBootstrappers = []string{
    71  		// Filesystem bootstrapping must be first.
    72  		bfs.FileSystemBootstrapperName,
    73  		// Commitlog excluded.
    74  		peers.PeersBootstrapperName,
    75  		uninitialized.UninitializedTopologyBootstrapperName,
    76  	}
    77  
    78  	validBootstrapModes = []BootstrapMode{
    79  		DefaultBootstrapMode,
    80  		PreferPeersBootstrapMode,
    81  		ExcludeCommitLogBootstrapMode,
    82  	}
    83  
    84  	errReadBootstrapModeInvalid = errors.New("bootstrap mode invalid")
    85  )
    86  
    87  // BootstrapMode defines the mode in which bootstrappers are run.
    88  type BootstrapMode uint
    89  
    90  const (
    91  	// DefaultBootstrapMode executes bootstrappers in default order.
    92  	DefaultBootstrapMode BootstrapMode = iota
    93  	// PreferPeersBootstrapMode executes peers before commitlog bootstrapper.
    94  	PreferPeersBootstrapMode
    95  	// ExcludeCommitLogBootstrapMode executes all default bootstrappers except commitlog.
    96  	ExcludeCommitLogBootstrapMode
    97  )
    98  
    99  // UnmarshalYAML unmarshals an BootstrapMode into a valid type from string.
   100  func (m *BootstrapMode) UnmarshalYAML(unmarshal func(interface{}) error) error {
   101  	var str string
   102  	if err := unmarshal(&str); err != nil {
   103  		return err
   104  	}
   105  
   106  	// If unspecified, use default mode.
   107  	if str == "" {
   108  		*m = DefaultBootstrapMode
   109  		return nil
   110  	}
   111  
   112  	for _, valid := range validBootstrapModes {
   113  		if str == valid.String() {
   114  			*m = valid
   115  			return nil
   116  		}
   117  	}
   118  	return fmt.Errorf("invalid BootstrapMode '%s' valid types are: %s",
   119  		str, validBootstrapModes)
   120  }
   121  
   122  // String returns the bootstrap mode as a string
   123  func (m BootstrapMode) String() string {
   124  	switch m {
   125  	case DefaultBootstrapMode:
   126  		return "default"
   127  	case PreferPeersBootstrapMode:
   128  		return "prefer_peers"
   129  	case ExcludeCommitLogBootstrapMode:
   130  		return "exclude_commitlog"
   131  	}
   132  	return "unknown"
   133  }
   134  
   135  // BootstrapConfiguration specifies the config for bootstrappers.
   136  type BootstrapConfiguration struct {
   137  	// BootstrapMode defines the mode in which bootstrappers are run.
   138  	BootstrapMode *BootstrapMode `yaml:"mode"`
   139  
   140  	// Filesystem bootstrapper configuration.
   141  	Filesystem *BootstrapFilesystemConfiguration `yaml:"filesystem"`
   142  
   143  	// Commitlog bootstrapper configuration.
   144  	Commitlog *BootstrapCommitlogConfiguration `yaml:"commitlog"`
   145  
   146  	// Peers bootstrapper configuration.
   147  	Peers *BootstrapPeersConfiguration `yaml:"peers"`
   148  
   149  	// CacheSeriesMetadata determines whether individual bootstrappers cache
   150  	// series metadata across all calls (namespaces / shards / blocks).
   151  	CacheSeriesMetadata *bool `yaml:"cacheSeriesMetadata"`
   152  
   153  	// IndexSegmentConcurrency determines the concurrency for building index
   154  	// segments.
   155  	IndexSegmentConcurrency *int `yaml:"indexSegmentConcurrency"`
   156  
   157  	// Verify specifies verification checks.
   158  	Verify *BootstrapVerifyConfiguration `yaml:"verify"`
   159  }
   160  
   161  // VerifyOrDefault returns verify configuration or default.
   162  func (bsc BootstrapConfiguration) VerifyOrDefault() BootstrapVerifyConfiguration {
   163  	if bsc.Verify == nil {
   164  		return BootstrapVerifyConfiguration{}
   165  	}
   166  
   167  	return *bsc.Verify
   168  }
   169  
   170  // BootstrapVerifyConfiguration outlines verification checks to enable
   171  // during a bootstrap.
   172  type BootstrapVerifyConfiguration struct {
   173  	VerifyIndexSegments *bool `yaml:"verifyIndexSegments"`
   174  }
   175  
   176  // VerifyIndexSegmentsOrDefault returns whether to verify index segments
   177  // or use default value.
   178  func (c BootstrapVerifyConfiguration) VerifyIndexSegmentsOrDefault() bool {
   179  	if c.VerifyIndexSegments == nil {
   180  		return false
   181  	}
   182  
   183  	return *c.VerifyIndexSegments
   184  }
   185  
   186  // BootstrapFilesystemConfiguration specifies config for the fs bootstrapper.
   187  type BootstrapFilesystemConfiguration struct {
   188  	// DeprecatedNumProcessorsPerCPU is the number of processors per CPU.
   189  	// TODO: Remove, this is deprecated since BootstrapDataNumProcessors() is
   190  	// no longer actually used anywhere.
   191  	DeprecatedNumProcessorsPerCPU float64 `yaml:"numProcessorsPerCPU" validate:"min=0.0"`
   192  
   193  	// Migration configuration specifies what version, if any, existing data filesets should be migrated to
   194  	// if necessary.
   195  	Migration *BootstrapMigrationConfiguration `yaml:"migration"`
   196  }
   197  
   198  func (c BootstrapFilesystemConfiguration) migration() BootstrapMigrationConfiguration {
   199  	if cfg := c.Migration; cfg != nil {
   200  		return *cfg
   201  	}
   202  	return BootstrapMigrationConfiguration{}
   203  }
   204  
   205  func newDefaultBootstrapFilesystemConfiguration() BootstrapFilesystemConfiguration {
   206  	return BootstrapFilesystemConfiguration{
   207  		Migration: &BootstrapMigrationConfiguration{},
   208  	}
   209  }
   210  
   211  // BootstrapMigrationConfiguration specifies configuration for data migrations during bootstrapping.
   212  type BootstrapMigrationConfiguration struct {
   213  	// TargetMigrationVersion indicates that we should attempt to upgrade filesets to
   214  	// what’s expected of the specified version.
   215  	TargetMigrationVersion migration.MigrationVersion `yaml:"targetMigrationVersion"`
   216  
   217  	// Concurrency sets the number of concurrent workers performing migrations.
   218  	Concurrency int `yaml:"concurrency"`
   219  }
   220  
   221  // NewOptions generates migration.Options from the configuration.
   222  func (m BootstrapMigrationConfiguration) NewOptions() migration.Options {
   223  	opts := migration.NewOptions().SetTargetMigrationVersion(m.TargetMigrationVersion)
   224  
   225  	if m.Concurrency > 0 {
   226  		opts = opts.SetConcurrency(m.Concurrency)
   227  	}
   228  
   229  	return opts
   230  }
   231  
   232  // BootstrapCommitlogConfiguration specifies config for the commitlog bootstrapper.
   233  type BootstrapCommitlogConfiguration struct {
   234  	// ReturnUnfulfilledForCorruptCommitLogFiles controls whether the commitlog bootstrapper
   235  	// will return unfulfilled for all shard time ranges when it encounters a corrupt commit
   236  	// file. Note that regardless of this value, the commitlog bootstrapper will still try and
   237  	// read all the uncorrupted commitlog files and return as much data as it can, but setting
   238  	// this to true allows the node to attempt a repair if the peers bootstrapper is configured
   239  	// after the commitlog bootstrapper.
   240  	ReturnUnfulfilledForCorruptCommitLogFiles bool `yaml:"returnUnfulfilledForCorruptCommitLogFiles"`
   241  }
   242  
   243  func newDefaultBootstrapCommitlogConfiguration() BootstrapCommitlogConfiguration {
   244  	return BootstrapCommitlogConfiguration{
   245  		ReturnUnfulfilledForCorruptCommitLogFiles: commitlog.DefaultReturnUnfulfilledForCorruptCommitLogFiles,
   246  	}
   247  }
   248  
   249  // BootstrapPeersConfiguration specifies config for the peers bootstrapper.
   250  type BootstrapPeersConfiguration struct {
   251  	// StreamShardConcurrency controls how many shards in parallel to stream
   252  	// for in memory data being streamed between peers (most recent block).
   253  	// Defaults to: numCPU.
   254  	StreamShardConcurrency *int `yaml:"streamShardConcurrency"`
   255  	// StreamPersistShardConcurrency controls how many shards in parallel to stream
   256  	// for historical data being streamed between peers (historical blocks).
   257  	// Defaults to: numCPU / 2.
   258  	StreamPersistShardConcurrency *int `yaml:"streamPersistShardConcurrency"`
   259  	// StreamPersistShardFlushConcurrency controls how many shards in parallel to flush
   260  	// for historical data being streamed between peers (historical blocks).
   261  	// Defaults to: 1.
   262  	StreamPersistShardFlushConcurrency *int `yaml:"streamPersistShardFlushConcurrency"`
   263  }
   264  
   265  // New creates a bootstrap process based on the bootstrap configuration.
   266  func (bsc BootstrapConfiguration) New(
   267  	rsOpts result.Options,
   268  	opts storage.Options,
   269  	topoMapProvider topology.MapProvider,
   270  	origin topology.Host,
   271  	adminClient client.AdminClient,
   272  ) (bootstrap.ProcessProvider, error) {
   273  	idxOpts := opts.IndexOptions()
   274  	compactor, err := compaction.NewCompactor(idxOpts.MetadataArrayPool(),
   275  		index.MetadataArrayPoolCapacity,
   276  		idxOpts.SegmentBuilderOptions(),
   277  		idxOpts.FSTSegmentOptions(),
   278  		compaction.CompactorOptions{
   279  			FSTWriterOptions: &fst.WriterOptions{
   280  				// DisableRegistry is set to true to trade a larger FST size
   281  				// for a faster FST compaction since we want to reduce the end
   282  				// to end latency for time to first index a metric.
   283  				DisableRegistry: true,
   284  			},
   285  		})
   286  	if err != nil {
   287  		return nil, err
   288  	}
   289  
   290  	var (
   291  		bs                   bootstrap.BootstrapperProvider
   292  		fsOpts               = opts.CommitLogOptions().FilesystemOptions()
   293  		orderedBootstrappers = bsc.orderedBootstrappers()
   294  	)
   295  	// Start from the end of the list because the bootstrappers are ordered by precedence in descending order.
   296  	// I.e. each bootstrapper wraps the preceding bootstrapper, and so the outer-most bootstrapper is run first.
   297  	for i := len(orderedBootstrappers) - 1; i >= 0; i-- {
   298  		switch orderedBootstrappers[i] {
   299  		case bootstrapper.NoOpAllBootstrapperName:
   300  			bs = bootstrapper.NewNoOpAllBootstrapperProvider()
   301  		case bootstrapper.NoOpNoneBootstrapperName:
   302  			bs = bootstrapper.NewNoOpNoneBootstrapperProvider()
   303  		case bfs.FileSystemBootstrapperName:
   304  			fsCfg := bsc.filesystemConfig()
   305  			fsbOpts := bfs.NewOptions().
   306  				SetInstrumentOptions(opts.InstrumentOptions()).
   307  				SetResultOptions(rsOpts).
   308  				SetFilesystemOptions(fsOpts).
   309  				SetIndexOptions(opts.IndexOptions()).
   310  				SetPersistManager(opts.PersistManager()).
   311  				SetIndexClaimsManager(opts.IndexClaimsManager()).
   312  				SetCompactor(compactor).
   313  				SetRuntimeOptionsManager(opts.RuntimeOptionsManager()).
   314  				SetIdentifierPool(opts.IdentifierPool()).
   315  				SetMigrationOptions(fsCfg.migration().NewOptions()).
   316  				SetStorageOptions(opts).
   317  				SetIndexSegmentsVerify(bsc.VerifyOrDefault().VerifyIndexSegmentsOrDefault())
   318  			if v := bsc.IndexSegmentConcurrency; v != nil {
   319  				fsbOpts = fsbOpts.SetIndexSegmentConcurrency(*v)
   320  			}
   321  			if err := fsbOpts.Validate(); err != nil {
   322  				return nil, err
   323  			}
   324  			bs, err = bfs.NewFileSystemBootstrapperProvider(fsbOpts, bs)
   325  			if err != nil {
   326  				return nil, err
   327  			}
   328  		case commitlog.CommitLogBootstrapperName:
   329  			cCfg := bsc.commitlogConfig()
   330  			cOpts := commitlog.NewOptions().
   331  				SetResultOptions(rsOpts).
   332  				SetCommitLogOptions(opts.CommitLogOptions()).
   333  				SetRuntimeOptionsManager(opts.RuntimeOptionsManager()).
   334  				SetReturnUnfulfilledForCorruptCommitLogFiles(cCfg.ReturnUnfulfilledForCorruptCommitLogFiles)
   335  			if err := cOpts.Validate(); err != nil {
   336  				return nil, err
   337  			}
   338  			inspection, err := fs.InspectFilesystem(fsOpts)
   339  			if err != nil {
   340  				return nil, err
   341  			}
   342  			bs, err = commitlog.NewCommitLogBootstrapperProvider(cOpts, inspection, bs)
   343  			if err != nil {
   344  				return nil, err
   345  			}
   346  		case peers.PeersBootstrapperName:
   347  			pCfg := bsc.peersConfig()
   348  			pOpts := peers.NewOptions().
   349  				SetResultOptions(rsOpts).
   350  				SetFilesystemOptions(fsOpts).
   351  				SetIndexOptions(opts.IndexOptions()).
   352  				SetAdminClient(adminClient).
   353  				SetPersistManager(opts.PersistManager()).
   354  				SetIndexClaimsManager(opts.IndexClaimsManager()).
   355  				SetCompactor(compactor).
   356  				SetRuntimeOptionsManager(opts.RuntimeOptionsManager()).
   357  				SetContextPool(opts.ContextPool())
   358  			if pCfg.StreamShardConcurrency != nil {
   359  				pOpts = pOpts.SetDefaultShardConcurrency(*pCfg.StreamShardConcurrency)
   360  			}
   361  			if pCfg.StreamPersistShardConcurrency != nil {
   362  				pOpts = pOpts.SetShardPersistenceConcurrency(*pCfg.StreamPersistShardConcurrency)
   363  			}
   364  			if pCfg.StreamPersistShardFlushConcurrency != nil {
   365  				pOpts = pOpts.SetShardPersistenceFlushConcurrency(*pCfg.StreamPersistShardFlushConcurrency)
   366  			}
   367  			if v := bsc.IndexSegmentConcurrency; v != nil {
   368  				pOpts = pOpts.SetIndexSegmentConcurrency(*v)
   369  			}
   370  			if err := pOpts.Validate(); err != nil {
   371  				return nil, err
   372  			}
   373  			bs, err = peers.NewPeersBootstrapperProvider(pOpts, bs)
   374  			if err != nil {
   375  				return nil, err
   376  			}
   377  		case uninitialized.UninitializedTopologyBootstrapperName:
   378  			uOpts := uninitialized.NewOptions().
   379  				SetResultOptions(rsOpts).
   380  				SetInstrumentOptions(opts.InstrumentOptions())
   381  			if err := uOpts.Validate(); err != nil {
   382  				return nil, err
   383  			}
   384  			bs = uninitialized.NewUninitializedTopologyBootstrapperProvider(uOpts, bs)
   385  		default:
   386  			return nil, fmt.Errorf("unknown bootstrapper: %s", orderedBootstrappers[i])
   387  		}
   388  	}
   389  
   390  	providerOpts := bootstrap.NewProcessOptions().
   391  		SetTopologyMapProvider(topoMapProvider).
   392  		SetOrigin(origin)
   393  	if bsc.CacheSeriesMetadata != nil {
   394  		providerOpts = providerOpts.SetCacheSeriesMetadata(*bsc.CacheSeriesMetadata)
   395  	}
   396  	return bootstrap.NewProcessProvider(bs, providerOpts, rsOpts, fsOpts)
   397  }
   398  
   399  func (bsc BootstrapConfiguration) filesystemConfig() BootstrapFilesystemConfiguration {
   400  	if cfg := bsc.Filesystem; cfg != nil {
   401  		return *cfg
   402  	}
   403  	return newDefaultBootstrapFilesystemConfiguration()
   404  }
   405  
   406  func (bsc BootstrapConfiguration) commitlogConfig() BootstrapCommitlogConfiguration {
   407  	if cfg := bsc.Commitlog; cfg != nil {
   408  		return *cfg
   409  	}
   410  	return newDefaultBootstrapCommitlogConfiguration()
   411  }
   412  
   413  func (bsc BootstrapConfiguration) peersConfig() BootstrapPeersConfiguration {
   414  	if cfg := bsc.Peers; cfg != nil {
   415  		return *cfg
   416  	}
   417  	return BootstrapPeersConfiguration{}
   418  }
   419  
   420  func (bsc BootstrapConfiguration) orderedBootstrappers() []string {
   421  	if bsc.BootstrapMode != nil {
   422  		switch *bsc.BootstrapMode {
   423  		case DefaultBootstrapMode:
   424  			return defaultOrderedBootstrappers
   425  		case PreferPeersBootstrapMode:
   426  			return preferPeersOrderedBootstrappers
   427  		case ExcludeCommitLogBootstrapMode:
   428  			return excludeCommitLogOrderedBootstrappers
   429  		}
   430  	}
   431  	return defaultOrderedBootstrappers
   432  }