github.com/m3db/m3@v1.5.0/src/dbnode/server/server.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Package server contains the code to run the dbnode server.
    22  package server
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"math"
    30  	"net/http"
    31  	"os"
    32  	"path"
    33  	"runtime"
    34  	"runtime/debug"
    35  	"strings"
    36  	"sync"
    37  	"time"
    38  
    39  	clusterclient "github.com/m3db/m3/src/cluster/client"
    40  	"github.com/m3db/m3/src/cluster/client/etcd"
    41  	"github.com/m3db/m3/src/cluster/generated/proto/commonpb"
    42  	"github.com/m3db/m3/src/cluster/generated/proto/kvpb"
    43  	"github.com/m3db/m3/src/cluster/kv"
    44  	"github.com/m3db/m3/src/cluster/placement"
    45  	"github.com/m3db/m3/src/cluster/placementhandler"
    46  	"github.com/m3db/m3/src/cluster/placementhandler/handleroptions"
    47  	"github.com/m3db/m3/src/cmd/services/m3dbnode/config"
    48  	"github.com/m3db/m3/src/dbnode/client"
    49  	"github.com/m3db/m3/src/dbnode/encoding"
    50  	"github.com/m3db/m3/src/dbnode/encoding/m3tsz"
    51  	"github.com/m3db/m3/src/dbnode/encoding/proto"
    52  	"github.com/m3db/m3/src/dbnode/environment"
    53  	"github.com/m3db/m3/src/dbnode/kvconfig"
    54  	"github.com/m3db/m3/src/dbnode/namespace"
    55  	hjcluster "github.com/m3db/m3/src/dbnode/network/server/httpjson/cluster"
    56  	hjnode "github.com/m3db/m3/src/dbnode/network/server/httpjson/node"
    57  	"github.com/m3db/m3/src/dbnode/network/server/tchannelthrift"
    58  	ttcluster "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/cluster"
    59  	ttnode "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/node"
    60  	"github.com/m3db/m3/src/dbnode/persist/fs"
    61  	"github.com/m3db/m3/src/dbnode/persist/fs/commitlog"
    62  	"github.com/m3db/m3/src/dbnode/ratelimit"
    63  	"github.com/m3db/m3/src/dbnode/retention"
    64  	m3dbruntime "github.com/m3db/m3/src/dbnode/runtime"
    65  	"github.com/m3db/m3/src/dbnode/sharding"
    66  	"github.com/m3db/m3/src/dbnode/storage"
    67  	"github.com/m3db/m3/src/dbnode/storage/block"
    68  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/result"
    69  	"github.com/m3db/m3/src/dbnode/storage/cluster"
    70  	"github.com/m3db/m3/src/dbnode/storage/index"
    71  	"github.com/m3db/m3/src/dbnode/storage/limits"
    72  	"github.com/m3db/m3/src/dbnode/storage/limits/permits"
    73  	"github.com/m3db/m3/src/dbnode/storage/series"
    74  	"github.com/m3db/m3/src/dbnode/topology"
    75  	"github.com/m3db/m3/src/dbnode/ts"
    76  	"github.com/m3db/m3/src/dbnode/ts/writes"
    77  	xtchannel "github.com/m3db/m3/src/dbnode/x/tchannel"
    78  	"github.com/m3db/m3/src/dbnode/x/xio"
    79  	"github.com/m3db/m3/src/dbnode/x/xpool"
    80  	m3ninxindex "github.com/m3db/m3/src/m3ninx/index"
    81  	"github.com/m3db/m3/src/m3ninx/postings"
    82  	"github.com/m3db/m3/src/m3ninx/postings/roaring"
    83  	"github.com/m3db/m3/src/x/clock"
    84  	xconfig "github.com/m3db/m3/src/x/config"
    85  	xcontext "github.com/m3db/m3/src/x/context"
    86  	xdebug "github.com/m3db/m3/src/x/debug"
    87  	extdebug "github.com/m3db/m3/src/x/debug/ext"
    88  	xdocs "github.com/m3db/m3/src/x/docs"
    89  	"github.com/m3db/m3/src/x/ident"
    90  	"github.com/m3db/m3/src/x/instrument"
    91  	"github.com/m3db/m3/src/x/mmap"
    92  	xos "github.com/m3db/m3/src/x/os"
    93  	"github.com/m3db/m3/src/x/pool"
    94  	"github.com/m3db/m3/src/x/serialize"
    95  
    96  	apachethrift "github.com/apache/thrift/lib/go/thrift"
    97  	"github.com/m3dbx/vellum/levenshtein"
    98  	"github.com/m3dbx/vellum/levenshtein2"
    99  	"github.com/m3dbx/vellum/regexp"
   100  	"github.com/opentracing/opentracing-go"
   101  	"github.com/uber-go/tally"
   102  	"github.com/uber/tchannel-go"
   103  	"go.etcd.io/etcd/server/v3/embed"
   104  	"go.uber.org/zap"
   105  )
   106  
   107  const (
   108  	bootstrapConfigInitTimeout       = 10 * time.Second
   109  	serverGracefulCloseTimeout       = 10 * time.Second
   110  	debugServerGracefulCloseTimeout  = 2 * time.Second
   111  	bgProcessLimitInterval           = 10 * time.Second
   112  	maxBgProcessLimitMonitorDuration = 5 * time.Minute
   113  	cpuProfileDuration               = 5 * time.Second
   114  	filePathPrefixLockFile           = ".lock"
   115  	defaultServiceName               = "m3dbnode"
   116  	skipRaiseProcessLimitsEnvVar     = "SKIP_PROCESS_LIMITS_RAISE"
   117  	skipRaiseProcessLimitsEnvVarTrue = "true"
   118  	mmapReporterMetricName           = "mmap-mapped-bytes"
   119  	mmapReporterTagName              = "map-name"
   120  )
   121  
   122  // RunOptions provides options for running the server
   123  // with backwards compatibility if only solely adding fields.
   124  type RunOptions struct {
   125  	// ConfigFile is the YAML configuration file to use to run the server.
   126  	ConfigFile string
   127  
   128  	// Config is an alternate way to provide configuration and will be used
   129  	// instead of parsing ConfigFile if ConfigFile is not specified.
   130  	Config config.DBConfiguration
   131  
   132  	// BootstrapCh is a channel to listen on to be notified of bootstrap.
   133  	BootstrapCh chan<- struct{}
   134  
   135  	// EmbeddedKVCh is a channel to listen on to be notified that the embedded KV has bootstrapped.
   136  	EmbeddedKVCh chan<- struct{}
   137  
   138  	// ClientCh is a channel to listen on to share the same m3db client that this server uses.
   139  	ClientCh chan<- client.Client
   140  
   141  	// ClusterClientCh is a channel to listen on to share the same m3 cluster client that this server uses.
   142  	ClusterClientCh chan<- clusterclient.Client
   143  
   144  	// KVStoreCh is a channel to listen on to share the same m3 kv store client that this server uses.
   145  	KVStoreCh chan<- kv.Store
   146  
   147  	// InterruptCh is a programmatic interrupt channel to supply to
   148  	// interrupt and shutdown the server.
   149  	InterruptCh <-chan error
   150  
   151  	// ShutdownCh is an optional channel to supply if interested in receiving
   152  	// a notification that the server has shutdown.
   153  	ShutdownCh chan<- struct{}
   154  
   155  	// CustomOptions are custom options to apply to the session.
   156  	CustomOptions []client.CustomAdminOption
   157  
   158  	// Transform is a function to transform the Options.
   159  	Transform storage.OptionTransform
   160  
   161  	// StorageOptions are additional storage options.
   162  	StorageOptions StorageOptions
   163  
   164  	// CustomBuildTags are additional tags to be added to the instrument build
   165  	// reporter.
   166  	CustomBuildTags map[string]string
   167  }
   168  
   169  // Run runs the server programmatically given a filename for the
   170  // configuration file.
   171  func Run(runOpts RunOptions) {
   172  	var cfg config.DBConfiguration
   173  	if runOpts.ConfigFile != "" {
   174  		var rootCfg config.Configuration
   175  		if err := xconfig.LoadFile(&rootCfg, runOpts.ConfigFile, xconfig.Options{}); err != nil {
   176  			// NB(r): Use fmt.Fprintf(os.Stderr, ...) to avoid etcd.SetGlobals()
   177  			// sending stdlib "log" to black hole. Don't remove unless with good reason.
   178  			fmt.Fprintf(os.Stderr, "unable to load %s: %v", runOpts.ConfigFile, err)
   179  			os.Exit(1)
   180  		}
   181  
   182  		cfg = *rootCfg.DB
   183  	} else {
   184  		cfg = runOpts.Config
   185  	}
   186  
   187  	err := cfg.Validate()
   188  	if err != nil {
   189  		// NB(r): Use fmt.Fprintf(os.Stderr, ...) to avoid etcd.SetGlobals()
   190  		// sending stdlib "log" to black hole. Don't remove unless with good reason.
   191  		fmt.Fprintf(os.Stderr, "error initializing config defaults and validating config: %v", err)
   192  		os.Exit(1)
   193  	}
   194  
   195  	logger, err := cfg.LoggingOrDefault().BuildLogger()
   196  	if err != nil {
   197  		// NB(r): Use fmt.Fprintf(os.Stderr, ...) to avoid etcd.SetGlobals()
   198  		// sending stdlib "log" to black hole. Don't remove unless with good reason.
   199  		fmt.Fprintf(os.Stderr, "unable to create logger: %v", err)
   200  		os.Exit(1)
   201  	}
   202  
   203  	// NB(nate): Register shutdown notification defer function first so that
   204  	// it's the last defer to fire before terminating. This allows other defer methods
   205  	// that clean up resources to execute first.
   206  	if runOpts.ShutdownCh != nil {
   207  		defer func() {
   208  			select {
   209  			case runOpts.ShutdownCh <- struct{}{}:
   210  				break
   211  			default:
   212  				logger.Warn("could not send shutdown notification as channel was full")
   213  			}
   214  		}()
   215  	}
   216  
   217  	interruptOpts := xos.NewInterruptOptions()
   218  	if runOpts.InterruptCh != nil {
   219  		interruptOpts.InterruptCh = runOpts.InterruptCh
   220  	}
   221  	intWatchCancel := xos.WatchForInterrupt(logger, interruptOpts)
   222  	defer intWatchCancel()
   223  
   224  	defer logger.Sync()
   225  
   226  	cfg.Debug.SetRuntimeValues(logger)
   227  
   228  	xconfig.WarnOnDeprecation(cfg, logger)
   229  
   230  	// By default attempt to raise process limits, which is a benign operation.
   231  	skipRaiseLimits := strings.TrimSpace(os.Getenv(skipRaiseProcessLimitsEnvVar))
   232  	if skipRaiseLimits != skipRaiseProcessLimitsEnvVarTrue {
   233  		// Raise fd limits to nr_open system limit
   234  		result, err := xos.RaiseProcessNoFileToNROpen()
   235  		if err != nil {
   236  			logger.Warn("unable to raise rlimit", zap.Error(err))
   237  		} else {
   238  			logger.Info("raised rlimit no file fds limit",
   239  				zap.Bool("required", result.RaisePerformed),
   240  				zap.Uint64("sysNROpenValue", result.NROpenValue),
   241  				zap.Uint64("noFileMaxValue", result.NoFileMaxValue),
   242  				zap.Uint64("noFileCurrValue", result.NoFileCurrValue))
   243  		}
   244  	}
   245  
   246  	// Parse file and directory modes
   247  	newFileMode, err := cfg.Filesystem.ParseNewFileMode()
   248  	if err != nil {
   249  		logger.Fatal("could not parse new file mode", zap.Error(err))
   250  	}
   251  
   252  	newDirectoryMode, err := cfg.Filesystem.ParseNewDirectoryMode()
   253  	if err != nil {
   254  		logger.Fatal("could not parse new directory mode", zap.Error(err))
   255  	}
   256  
   257  	// Obtain a lock on `filePathPrefix`, or exit if another process already has it.
   258  	// The lock consists of a lock file (on the file system) and a lock in memory.
   259  	// When the process exits gracefully, both the lock file and the lock will be removed.
   260  	// If the process exits ungracefully, only the lock in memory will be removed, the lock
   261  	// file will remain on the file system. When a dbnode starts after an ungracefully stop,
   262  	// it will be able to acquire the lock despite the fact the the lock file exists.
   263  	lockPath := path.Join(cfg.Filesystem.FilePathPrefixOrDefault(), filePathPrefixLockFile)
   264  	fslock, err := createAndAcquireLockfile(lockPath, newDirectoryMode)
   265  	if err != nil {
   266  		logger.Fatal("could not acquire lock", zap.String("path", lockPath), zap.Error(err))
   267  	}
   268  	// nolint: errcheck
   269  	defer fslock.releaseLockfile()
   270  
   271  	go bgValidateProcessLimits(logger)
   272  	debug.SetGCPercent(cfg.GCPercentageOrDefault())
   273  
   274  	defaultServeMux := http.NewServeMux()
   275  	scope, _, _, err := cfg.MetricsOrDefault().NewRootScopeAndReporters(
   276  		instrument.NewRootScopeAndReportersOptions{
   277  			PrometheusDefaultServeMux: defaultServeMux,
   278  		})
   279  	if err != nil {
   280  		logger.Fatal("could not connect to metrics", zap.Error(err))
   281  	}
   282  
   283  	hostID, err := cfg.HostIDOrDefault().Resolve()
   284  	if err != nil {
   285  		logger.Fatal("could not resolve local host ID", zap.Error(err))
   286  	}
   287  
   288  	var (
   289  		tracer      opentracing.Tracer
   290  		traceCloser io.Closer
   291  	)
   292  
   293  	if cfg.Tracing == nil {
   294  		tracer = opentracing.NoopTracer{}
   295  		logger.Info("tracing disabled; set `tracing.backend` to enable")
   296  	} else {
   297  		// setup tracer
   298  		serviceName := cfg.Tracing.ServiceName
   299  		if serviceName == "" {
   300  			serviceName = defaultServiceName
   301  		}
   302  		tracer, traceCloser, err = cfg.Tracing.NewTracer(serviceName, scope.SubScope("jaeger"), logger)
   303  		if err != nil {
   304  			tracer = opentracing.NoopTracer{}
   305  			logger.Warn("could not initialize tracing; using no-op tracer instead",
   306  				zap.String("service", serviceName), zap.Error(err))
   307  		} else {
   308  			defer traceCloser.Close()
   309  			logger.Info("tracing enabled", zap.String("service", serviceName))
   310  		}
   311  	}
   312  
   313  	// Presence of KV server config indicates embedded etcd cluster
   314  	discoveryConfig := cfg.DiscoveryOrDefault()
   315  	envConfig, err := discoveryConfig.EnvironmentConfig(hostID)
   316  	if err != nil {
   317  		logger.Fatal("could not get env config from discovery config", zap.Error(err))
   318  	}
   319  
   320  	if envConfig.SeedNodes == nil {
   321  		logger.Info("no seed nodes set, using dedicated etcd cluster")
   322  	} else {
   323  		// Default etcd client clusters if not set already
   324  		service, err := envConfig.Services.SyncCluster()
   325  		if err != nil {
   326  			logger.Fatal("invalid cluster configuration", zap.Error(err))
   327  		}
   328  
   329  		clusters := service.Service.ETCDClusters
   330  		seedNodes := envConfig.SeedNodes.InitialCluster
   331  		if len(clusters) == 0 {
   332  			endpoints, err := config.InitialClusterEndpoints(seedNodes)
   333  			if err != nil {
   334  				logger.Fatal("unable to create etcd clusters", zap.Error(err))
   335  			}
   336  
   337  			zone := service.Service.Zone
   338  
   339  			logger.Info("using seed nodes etcd cluster",
   340  				zap.String("zone", zone), zap.Strings("endpoints", endpoints))
   341  			service.Service.ETCDClusters = []etcd.ClusterConfig{{
   342  				Zone:      zone,
   343  				Endpoints: endpoints,
   344  			}}
   345  		}
   346  
   347  		seedNodeHostIDs := make([]string, 0, len(seedNodes))
   348  		for _, entry := range seedNodes {
   349  			seedNodeHostIDs = append(seedNodeHostIDs, entry.HostID)
   350  		}
   351  		logger.Info("resolving seed node configuration",
   352  			zap.String("hostID", hostID), zap.Strings("seedNodeHostIDs", seedNodeHostIDs),
   353  		)
   354  
   355  		if !config.IsSeedNode(seedNodes, hostID) {
   356  			logger.Info("not a seed node, using cluster seed nodes")
   357  		} else {
   358  			logger.Info("seed node, starting etcd server")
   359  
   360  			etcdCfg, err := config.NewEtcdEmbedConfig(cfg)
   361  			if err != nil {
   362  				logger.Fatal("unable to create etcd config", zap.Error(err))
   363  			}
   364  
   365  			e, err := embed.StartEtcd(etcdCfg)
   366  			if err != nil {
   367  				logger.Fatal("could not start embedded etcd", zap.Error(err))
   368  			}
   369  
   370  			if runOpts.EmbeddedKVCh != nil {
   371  				// Notify on embedded KV bootstrap chan if specified
   372  				runOpts.EmbeddedKVCh <- struct{}{}
   373  			}
   374  
   375  			defer e.Close()
   376  		}
   377  	}
   378  
   379  	// By default use histogram timers for timers that
   380  	// are constructed allowing for type to be picked
   381  	// by the caller using instrument.NewTimer(...).
   382  	timerOpts := instrument.NewHistogramTimerOptions(instrument.HistogramTimerOptions{})
   383  	timerOpts.StandardSampleRate = cfg.MetricsOrDefault().SampleRate()
   384  
   385  	var (
   386  		opts  = storage.NewOptions()
   387  		iOpts = opts.InstrumentOptions().
   388  			SetLogger(logger).
   389  			SetMetricsScope(scope).
   390  			SetTimerOptions(timerOpts).
   391  			SetTracer(tracer).
   392  			SetCustomBuildTags(runOpts.CustomBuildTags)
   393  	)
   394  	opts = opts.SetInstrumentOptions(iOpts)
   395  
   396  	// Only override the default MemoryTracker (which has default limits) if a custom limit has
   397  	// been set.
   398  	if cfg.Limits.MaxOutstandingRepairedBytes > 0 {
   399  		memTrackerOptions := storage.NewMemoryTrackerOptions(cfg.Limits.MaxOutstandingRepairedBytes)
   400  		memTracker := storage.NewMemoryTracker(memTrackerOptions)
   401  		opts = opts.SetMemoryTracker(memTracker)
   402  	}
   403  
   404  	opentracing.SetGlobalTracer(tracer)
   405  
   406  	// Set global index options.
   407  	if n := cfg.Index.RegexpDFALimitOrDefault(); n > 0 {
   408  		regexp.SetStateLimit(n)
   409  		levenshtein.SetStateLimit(n)
   410  		levenshtein2.SetStateLimit(n)
   411  	}
   412  	if n := cfg.Index.RegexpFSALimitOrDefault(); n > 0 {
   413  		regexp.SetDefaultLimit(n)
   414  	}
   415  
   416  	buildReporter := instrument.NewBuildReporter(iOpts)
   417  	if err := buildReporter.Start(); err != nil {
   418  		logger.Fatal("unable to start build reporter", zap.Error(err))
   419  	}
   420  	defer buildReporter.Stop()
   421  
   422  	mmapCfg := cfg.Filesystem.MmapConfigurationOrDefault()
   423  	shouldUseHugeTLB := mmapCfg.HugeTLB.Enabled
   424  	if shouldUseHugeTLB {
   425  		// Make sure the host supports HugeTLB before proceeding with it to prevent
   426  		// excessive log spam.
   427  		shouldUseHugeTLB, err = hostSupportsHugeTLB()
   428  		if err != nil {
   429  			logger.Fatal("could not determine if host supports HugeTLB", zap.Error(err))
   430  		}
   431  		if !shouldUseHugeTLB {
   432  			logger.Warn("host doesn't support HugeTLB, proceeding without it")
   433  		}
   434  	}
   435  
   436  	mmapReporter := newMmapReporter(scope)
   437  	mmapReporterCtx, cancel := context.WithCancel(context.Background())
   438  	defer cancel()
   439  	go mmapReporter.Run(mmapReporterCtx)
   440  	opts = opts.SetMmapReporter(mmapReporter)
   441  
   442  	runtimeOpts := m3dbruntime.NewOptions().
   443  		SetPersistRateLimitOptions(ratelimit.NewOptions().
   444  			SetLimitEnabled(true).
   445  			SetLimitMbps(cfg.Filesystem.ThroughputLimitMbpsOrDefault()).
   446  			SetLimitCheckEvery(cfg.Filesystem.ThroughputCheckEveryOrDefault())).
   447  		SetWriteNewSeriesAsync(cfg.WriteNewSeriesAsyncOrDefault()).
   448  		SetWriteNewSeriesBackoffDuration(cfg.WriteNewSeriesBackoffDurationOrDefault())
   449  
   450  	if lruCfg := cfg.Cache.SeriesConfiguration().LRU; lruCfg != nil {
   451  		runtimeOpts = runtimeOpts.SetMaxWiredBlocks(lruCfg.MaxBlocks)
   452  	}
   453  
   454  	// Setup query stats tracking.
   455  	var (
   456  		docsLimit           = limits.DefaultLookbackLimitOptions()
   457  		bytesReadLimit      = limits.DefaultLookbackLimitOptions()
   458  		diskSeriesReadLimit = limits.DefaultLookbackLimitOptions()
   459  		aggDocsLimit        = limits.DefaultLookbackLimitOptions()
   460  	)
   461  
   462  	if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedSeriesBlocks; limitConfig != nil {
   463  		docsLimit.Limit = limitConfig.Value
   464  		docsLimit.Lookback = limitConfig.Lookback
   465  	}
   466  	if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedSeriesDiskBytesRead; limitConfig != nil {
   467  		bytesReadLimit.Limit = limitConfig.Value
   468  		bytesReadLimit.Lookback = limitConfig.Lookback
   469  	}
   470  	if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedSeriesDiskRead; limitConfig != nil {
   471  		diskSeriesReadLimit.Limit = limitConfig.Value
   472  		diskSeriesReadLimit.Lookback = limitConfig.Lookback
   473  	}
   474  	if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedMetadata; limitConfig != nil {
   475  		aggDocsLimit.Limit = limitConfig.Value
   476  		aggDocsLimit.Lookback = limitConfig.Lookback
   477  	}
   478  	limitOpts := limits.NewOptions().
   479  		SetDocsLimitOpts(docsLimit).
   480  		SetBytesReadLimitOpts(bytesReadLimit).
   481  		SetDiskSeriesReadLimitOpts(diskSeriesReadLimit).
   482  		SetAggregateDocsLimitOpts(aggDocsLimit).
   483  		SetInstrumentOptions(iOpts)
   484  	if builder := opts.SourceLoggerBuilder(); builder != nil {
   485  		limitOpts = limitOpts.SetSourceLoggerBuilder(builder)
   486  	}
   487  	opts = opts.SetLimitsOptions(limitOpts)
   488  
   489  	seriesReadPermits := permits.NewLookbackLimitPermitsManager(
   490  		"disk-series-read",
   491  		diskSeriesReadLimit,
   492  		iOpts,
   493  		limitOpts.SourceLoggerBuilder(),
   494  	)
   495  
   496  	permitOptions := opts.PermitsOptions().SetSeriesReadPermitsManager(seriesReadPermits)
   497  	maxIdxConcurrency := int(math.Ceil(float64(runtime.GOMAXPROCS(0)) / 2))
   498  	if cfg.Index.MaxQueryIDsConcurrency > 0 {
   499  		maxIdxConcurrency = cfg.Index.MaxQueryIDsConcurrency
   500  		logger.Info("max index query IDs concurrency set",
   501  			zap.Int("maxIdxConcurrency", maxIdxConcurrency))
   502  	} else {
   503  		logger.Info("max index query IDs concurrency was not set, falling back to default value",
   504  			zap.Int("maxIdxConcurrency", maxIdxConcurrency))
   505  	}
   506  	maxWorkerTime := time.Second
   507  	if cfg.Index.MaxWorkerTime > 0 {
   508  		maxWorkerTime = cfg.Index.MaxWorkerTime
   509  		logger.Info("max index worker time set",
   510  			zap.Duration("maxWorkerTime", maxWorkerTime))
   511  	} else {
   512  		logger.Info("max index worker time was not set, falling back to default value",
   513  			zap.Duration("maxWorkerTime", maxWorkerTime))
   514  	}
   515  	opts = opts.SetPermitsOptions(permitOptions.SetIndexQueryPermitsManager(
   516  		permits.NewFixedPermitsManager(maxIdxConcurrency, int64(maxWorkerTime), iOpts)))
   517  
   518  	// Setup postings list cache.
   519  	var (
   520  		plCacheConfig  = cfg.Cache.PostingsListConfiguration()
   521  		plCacheSize    = plCacheConfig.SizeOrDefault()
   522  		plCacheOptions = index.PostingsListCacheOptions{
   523  			InstrumentOptions: opts.InstrumentOptions().
   524  				SetMetricsScope(scope.SubScope("postings-list-cache")),
   525  		}
   526  	)
   527  	segmentPostingsListCache, err := index.NewPostingsListCache(plCacheSize, plCacheOptions)
   528  	if err != nil {
   529  		logger.Fatal("could not construct segment postings list cache", zap.Error(err))
   530  	}
   531  
   532  	segmentStopReporting := segmentPostingsListCache.Start()
   533  	defer segmentStopReporting()
   534  
   535  	searchPostingsListCache, err := index.NewPostingsListCache(plCacheSize, plCacheOptions)
   536  	if err != nil {
   537  		logger.Fatal("could not construct searches postings list cache", zap.Error(err))
   538  	}
   539  
   540  	searchStopReporting := searchPostingsListCache.Start()
   541  	defer searchStopReporting()
   542  
   543  	// Setup index regexp compilation cache.
   544  	m3ninxindex.SetRegexpCacheOptions(m3ninxindex.RegexpCacheOptions{
   545  		Size:  cfg.Cache.RegexpConfiguration().SizeOrDefault(),
   546  		Scope: iOpts.MetricsScope(),
   547  	})
   548  
   549  	if runOpts.Transform != nil {
   550  		opts = runOpts.Transform(opts)
   551  	}
   552  
   553  	queryLimits, err := limits.NewQueryLimits(opts.LimitsOptions())
   554  	if err != nil {
   555  		logger.Fatal("could not construct docs query limits from config", zap.Error(err))
   556  	}
   557  
   558  	queryLimits.Start()
   559  	defer queryLimits.Stop()
   560  	seriesReadPermits.Start()
   561  	defer seriesReadPermits.Stop()
   562  
   563  	// FOLLOWUP(prateek): remove this once we have the runtime options<->index wiring done
   564  	indexOpts := opts.IndexOptions()
   565  	insertMode := index.InsertSync
   566  
   567  	if cfg.WriteNewSeriesAsyncOrDefault() {
   568  		insertMode = index.InsertAsync
   569  	}
   570  	indexOpts = indexOpts.SetInsertMode(insertMode).
   571  		SetPostingsListCache(segmentPostingsListCache).
   572  		SetSearchPostingsListCache(searchPostingsListCache).
   573  		SetReadThroughSegmentOptions(index.ReadThroughSegmentOptions{
   574  			CacheRegexp:   plCacheConfig.CacheRegexpOrDefault(),
   575  			CacheTerms:    plCacheConfig.CacheTermsOrDefault(),
   576  			CacheSearches: plCacheConfig.CacheSearchOrDefault(),
   577  		}).
   578  		SetMmapReporter(mmapReporter).
   579  		SetQueryLimits(queryLimits)
   580  
   581  	opts = opts.SetIndexOptions(indexOpts)
   582  
   583  	if tick := cfg.Tick; tick != nil {
   584  		runtimeOpts = runtimeOpts.
   585  			SetTickSeriesBatchSize(tick.SeriesBatchSize).
   586  			SetTickPerSeriesSleepDuration(tick.PerSeriesSleepDuration).
   587  			SetTickMinimumInterval(tick.MinimumInterval)
   588  	}
   589  
   590  	runtimeOptsMgr := m3dbruntime.NewOptionsManager()
   591  	if err := runtimeOptsMgr.Update(runtimeOpts); err != nil {
   592  		logger.Fatal("could not set initial runtime options", zap.Error(err))
   593  	}
   594  	defer runtimeOptsMgr.Close()
   595  
   596  	opts = opts.SetRuntimeOptionsManager(runtimeOptsMgr)
   597  
   598  	policy, err := cfg.PoolingPolicyOrDefault()
   599  	if err != nil {
   600  		logger.Fatal("could not get pooling policy", zap.Error(err))
   601  	}
   602  
   603  	tagEncoderPool := serialize.NewTagEncoderPool(
   604  		serialize.NewTagEncoderOptions(),
   605  		poolOptions(
   606  			policy.TagEncoderPool,
   607  			scope.SubScope("tag-encoder-pool")))
   608  	tagEncoderPool.Init()
   609  	tagDecoderPool := serialize.NewTagDecoderPool(
   610  		serialize.NewTagDecoderOptions(serialize.TagDecoderOptionsConfig{}),
   611  		poolOptions(
   612  			policy.TagDecoderPool,
   613  			scope.SubScope("tag-decoder-pool")))
   614  	tagDecoderPool.Init()
   615  
   616  	// Pass nil for block.LeaseVerifier for now and it will be set after the
   617  	// db is constructed (since the db is required to construct a
   618  	// block.LeaseVerifier). Initialized here because it needs to be propagated
   619  	// to both the DB and the blockRetriever.
   620  	blockLeaseManager := block.NewLeaseManager(nil)
   621  	opts = opts.SetBlockLeaseManager(blockLeaseManager)
   622  	fsopts := fs.NewOptions().
   623  		SetClockOptions(opts.ClockOptions()).
   624  		SetInstrumentOptions(opts.InstrumentOptions().
   625  			SetMetricsScope(scope.SubScope("database.fs"))).
   626  		SetFilePathPrefix(cfg.Filesystem.FilePathPrefixOrDefault()).
   627  		SetNewFileMode(newFileMode).
   628  		SetNewDirectoryMode(newDirectoryMode).
   629  		SetWriterBufferSize(cfg.Filesystem.WriteBufferSizeOrDefault()).
   630  		SetDataReaderBufferSize(cfg.Filesystem.DataReadBufferSizeOrDefault()).
   631  		SetInfoReaderBufferSize(cfg.Filesystem.InfoReadBufferSizeOrDefault()).
   632  		SetSeekReaderBufferSize(cfg.Filesystem.SeekReadBufferSizeOrDefault()).
   633  		SetMmapEnableHugeTLB(shouldUseHugeTLB).
   634  		SetMmapHugeTLBThreshold(mmapCfg.HugeTLB.Threshold).
   635  		SetRuntimeOptionsManager(runtimeOptsMgr).
   636  		SetTagEncoderPool(tagEncoderPool).
   637  		SetTagDecoderPool(tagDecoderPool).
   638  		SetForceIndexSummariesMmapMemory(cfg.Filesystem.ForceIndexSummariesMmapMemoryOrDefault()).
   639  		SetForceBloomFilterMmapMemory(cfg.Filesystem.ForceBloomFilterMmapMemoryOrDefault()).
   640  		SetIndexBloomFilterFalsePositivePercent(cfg.Filesystem.BloomFilterFalsePositivePercentOrDefault()).
   641  		SetMmapReporter(mmapReporter)
   642  
   643  	var commitLogQueueSize int
   644  	cfgCommitLog := cfg.CommitLogOrDefault()
   645  	specified := cfgCommitLog.Queue.Size
   646  	switch cfgCommitLog.Queue.CalculationType {
   647  	case config.CalculationTypeFixed:
   648  		commitLogQueueSize = specified
   649  	case config.CalculationTypePerCPU:
   650  		commitLogQueueSize = specified * runtime.GOMAXPROCS(0)
   651  	default:
   652  		logger.Fatal("unknown commit log queue size type",
   653  			zap.Any("type", cfgCommitLog.Queue.CalculationType))
   654  	}
   655  
   656  	var commitLogQueueChannelSize int
   657  	if cfgCommitLog.QueueChannel != nil {
   658  		specified := cfgCommitLog.QueueChannel.Size
   659  		switch cfgCommitLog.Queue.CalculationType {
   660  		case config.CalculationTypeFixed:
   661  			commitLogQueueChannelSize = specified
   662  		case config.CalculationTypePerCPU:
   663  			commitLogQueueChannelSize = specified * runtime.GOMAXPROCS(0)
   664  		default:
   665  			logger.Fatal("unknown commit log queue channel size type",
   666  				zap.Any("type", cfgCommitLog.Queue.CalculationType))
   667  		}
   668  	} else {
   669  		commitLogQueueChannelSize = int(float64(commitLogQueueSize) / commitlog.MaximumQueueSizeQueueChannelSizeRatio)
   670  	}
   671  
   672  	// Set the series cache policy.
   673  	seriesCachePolicy := cfg.Cache.SeriesConfiguration().Policy
   674  	opts = opts.SetSeriesCachePolicy(seriesCachePolicy)
   675  
   676  	// Apply pooling options.
   677  	poolingPolicy, err := cfg.PoolingPolicyOrDefault()
   678  	if err != nil {
   679  		logger.Fatal("could not get pooling policy", zap.Error(err))
   680  	}
   681  
   682  	opts = withEncodingAndPoolingOptions(cfg, logger, opts, poolingPolicy)
   683  	opts = opts.SetCommitLogOptions(opts.CommitLogOptions().
   684  		SetInstrumentOptions(opts.InstrumentOptions()).
   685  		SetFilesystemOptions(fsopts).
   686  		SetStrategy(commitlog.StrategyWriteBehind).
   687  		SetFlushSize(cfgCommitLog.FlushMaxBytes).
   688  		SetFlushInterval(cfgCommitLog.FlushEvery).
   689  		SetBacklogQueueSize(commitLogQueueSize).
   690  		SetBacklogQueueChannelSize(commitLogQueueChannelSize))
   691  
   692  	// Setup the block retriever
   693  	switch seriesCachePolicy {
   694  	case series.CacheAll:
   695  		// No options needed to be set
   696  	default:
   697  		// All other caching strategies require retrieving series from disk
   698  		// to service a cache miss
   699  		retrieverOpts := fs.NewBlockRetrieverOptions().
   700  			SetBytesPool(opts.BytesPool()).
   701  			SetRetrieveRequestPool(opts.RetrieveRequestPool()).
   702  			SetIdentifierPool(opts.IdentifierPool()).
   703  			SetBlockLeaseManager(blockLeaseManager).
   704  			SetQueryLimits(queryLimits)
   705  		if blockRetrieveCfg := cfg.BlockRetrieve; blockRetrieveCfg != nil {
   706  			if v := blockRetrieveCfg.FetchConcurrency; v != nil {
   707  				retrieverOpts = retrieverOpts.SetFetchConcurrency(*v)
   708  			}
   709  			if v := blockRetrieveCfg.CacheBlocksOnRetrieve; v != nil {
   710  				retrieverOpts = retrieverOpts.SetCacheBlocksOnRetrieve(*v)
   711  			}
   712  		}
   713  		blockRetrieverMgr := block.NewDatabaseBlockRetrieverManager(
   714  			func(md namespace.Metadata, shardSet sharding.ShardSet) (block.DatabaseBlockRetriever, error) {
   715  				retriever, err := fs.NewBlockRetriever(retrieverOpts, fsopts)
   716  				if err != nil {
   717  					return nil, err
   718  				}
   719  				if err := retriever.Open(md, shardSet); err != nil {
   720  					return nil, err
   721  				}
   722  				return retriever, nil
   723  			})
   724  		opts = opts.SetDatabaseBlockRetrieverManager(blockRetrieverMgr)
   725  	}
   726  
   727  	// Set the persistence manager
   728  	pm, err := fs.NewPersistManager(fsopts)
   729  	if err != nil {
   730  		logger.Fatal("could not create persist manager", zap.Error(err))
   731  	}
   732  	opts = opts.SetPersistManager(pm)
   733  
   734  	// Set the index claims manager
   735  	icm, err := fs.NewIndexClaimsManager(fsopts)
   736  	if err != nil {
   737  		logger.Fatal("could not create index claims manager", zap.Error(err))
   738  	}
   739  	defer func() {
   740  		// Reset counter of index claims managers after server teardown.
   741  		fs.ResetIndexClaimsManagersUnsafe()
   742  	}()
   743  	opts = opts.SetIndexClaimsManager(icm)
   744  
   745  	if value := cfg.ForceColdWritesEnabled; value != nil {
   746  		// Allow forcing cold writes to be enabled by config.
   747  		opts = opts.SetForceColdWritesEnabled(*value)
   748  	}
   749  
   750  	forceColdWrites := opts.ForceColdWritesEnabled()
   751  	var envCfgResults environment.ConfigureResults
   752  	if len(envConfig.Statics) == 0 {
   753  		logger.Info("creating dynamic config service client with m3cluster")
   754  
   755  		envCfgResults, err = envConfig.Configure(environment.ConfigurationParameters{
   756  			InterruptedCh:          interruptOpts.InterruptedCh,
   757  			InstrumentOpts:         iOpts,
   758  			HashingSeed:            cfg.Hashing.Seed,
   759  			NewDirectoryMode:       newDirectoryMode,
   760  			ForceColdWritesEnabled: forceColdWrites,
   761  		})
   762  		if err != nil {
   763  			logger.Fatal("could not initialize dynamic config", zap.Error(err))
   764  		}
   765  	} else {
   766  		logger.Info("creating static config service client with m3cluster")
   767  
   768  		envCfgResults, err = envConfig.Configure(environment.ConfigurationParameters{
   769  			InterruptedCh:          interruptOpts.InterruptedCh,
   770  			InstrumentOpts:         iOpts,
   771  			HostID:                 hostID,
   772  			ForceColdWritesEnabled: forceColdWrites,
   773  		})
   774  		if err != nil {
   775  			logger.Fatal("could not initialize static config", zap.Error(err))
   776  		}
   777  	}
   778  
   779  	syncCfg, err := envCfgResults.SyncCluster()
   780  	if err != nil {
   781  		logger.Fatal("invalid cluster config", zap.Error(err))
   782  	}
   783  	if runOpts.ClusterClientCh != nil {
   784  		runOpts.ClusterClientCh <- syncCfg.ClusterClient
   785  	}
   786  	if runOpts.KVStoreCh != nil {
   787  		runOpts.KVStoreCh <- syncCfg.KVStore
   788  	}
   789  
   790  	opts = opts.SetNamespaceInitializer(syncCfg.NamespaceInitializer)
   791  
   792  	// Set tchannelthrift options.
   793  	ttopts := tchannelthrift.NewOptions().
   794  		SetClockOptions(opts.ClockOptions()).
   795  		SetInstrumentOptions(opts.InstrumentOptions()).
   796  		SetTopologyInitializer(syncCfg.TopologyInitializer).
   797  		SetIdentifierPool(opts.IdentifierPool()).
   798  		SetTagEncoderPool(tagEncoderPool).
   799  		SetCheckedBytesWrapperPool(opts.CheckedBytesWrapperPool()).
   800  		SetMaxOutstandingWriteRequests(cfg.Limits.MaxOutstandingWriteRequests).
   801  		SetMaxOutstandingReadRequests(cfg.Limits.MaxOutstandingReadRequests).
   802  		SetQueryLimits(queryLimits).
   803  		SetPermitsOptions(opts.PermitsOptions())
   804  
   805  	// Start servers before constructing the DB so orchestration tools can check health endpoints
   806  	// before topology is set.
   807  	var (
   808  		contextPool  = opts.ContextPool()
   809  		tchannelOpts = xtchannel.NewDefaultChannelOptions()
   810  		// Pass nil for the database argument because we haven't constructed it yet. We'll call
   811  		// SetDatabase() once we've initialized it.
   812  		service = ttnode.NewService(nil, ttopts)
   813  	)
   814  	if cfg.TChannel != nil {
   815  		tchannelOpts.MaxIdleTime = cfg.TChannel.MaxIdleTime
   816  		tchannelOpts.IdleCheckInterval = cfg.TChannel.IdleCheckInterval
   817  	}
   818  	tchanOpts := ttnode.NewOptions(tchannelOpts).
   819  		SetInstrumentOptions(opts.InstrumentOptions())
   820  	if fn := runOpts.StorageOptions.TChanChannelFn; fn != nil {
   821  		tchanOpts = tchanOpts.SetTChanChannelFn(fn)
   822  	}
   823  	if fn := runOpts.StorageOptions.TChanNodeServerFn; fn != nil {
   824  		tchanOpts = tchanOpts.SetTChanNodeServerFn(fn)
   825  	}
   826  
   827  	listenAddress := cfg.ListenAddressOrDefault()
   828  	tchannelthriftNodeClose, err := ttnode.NewServer(service,
   829  		listenAddress, contextPool, tchanOpts).ListenAndServe()
   830  	if err != nil {
   831  		logger.Fatal("could not open tchannelthrift interface",
   832  			zap.String("address", listenAddress), zap.Error(err))
   833  	}
   834  	defer tchannelthriftNodeClose()
   835  	logger.Info("node tchannelthrift: listening", zap.String("address", listenAddress))
   836  
   837  	httpListenAddress := cfg.HTTPNodeListenAddressOrDefault()
   838  	httpjsonNodeClose, err := hjnode.NewServer(service,
   839  		httpListenAddress, contextPool, nil).ListenAndServe()
   840  	if err != nil {
   841  		logger.Fatal("could not open httpjson interface",
   842  			zap.String("address", httpListenAddress), zap.Error(err))
   843  	}
   844  	defer httpjsonNodeClose()
   845  	logger.Info("node httpjson: listening", zap.String("address", httpListenAddress))
   846  
   847  	debugListenAddress := cfg.DebugListenAddressOrDefault()
   848  	if debugListenAddress != "" {
   849  		var debugWriter xdebug.ZipWriter
   850  		handlerOpts, err := placementhandler.NewHandlerOptions(syncCfg.ClusterClient,
   851  			placement.Configuration{}, nil, iOpts)
   852  		if err != nil {
   853  			logger.Warn("could not create handler options for debug writer", zap.Error(err))
   854  		} else {
   855  			envCfgCluster, err := envConfig.Services.SyncCluster()
   856  			if err != nil || envCfgCluster.Service == nil {
   857  				logger.Warn("could not get cluster config for debug writer",
   858  					zap.Error(err),
   859  					zap.Bool("envCfgClusterServiceIsNil", envCfgCluster.Service == nil))
   860  			} else {
   861  				debugWriter, err = extdebug.NewPlacementAndNamespaceZipWriterWithDefaultSources(
   862  					cpuProfileDuration,
   863  					syncCfg.ClusterClient,
   864  					handlerOpts,
   865  					[]handleroptions.ServiceNameAndDefaults{
   866  						{
   867  							ServiceName: handleroptions.M3DBServiceName,
   868  							Defaults: []handleroptions.ServiceOptionsDefault{
   869  								handleroptions.WithDefaultServiceEnvironment(envCfgCluster.Service.Env),
   870  								handleroptions.WithDefaultServiceZone(envCfgCluster.Service.Zone),
   871  							},
   872  						},
   873  					},
   874  					iOpts)
   875  				if err != nil {
   876  					logger.Error("unable to create debug writer", zap.Error(err))
   877  				}
   878  			}
   879  		}
   880  
   881  		debugClose := startDebugServer(debugWriter, logger, debugListenAddress, defaultServeMux)
   882  		defer debugClose()
   883  	}
   884  
   885  	topo, err := syncCfg.TopologyInitializer.Init()
   886  	if err != nil {
   887  		var interruptErr *xos.InterruptError
   888  		if errors.As(err, &interruptErr) {
   889  			logger.Warn("interrupt received. closing server", zap.Error(err))
   890  			// NB(nate): Have not attempted to start the actual database yet so
   891  			// it's safe for us to just return here.
   892  			return
   893  		}
   894  
   895  		logger.Fatal("could not initialize m3db topology", zap.Error(err))
   896  	}
   897  
   898  	var protoEnabled bool
   899  	if cfg.Proto != nil && cfg.Proto.Enabled {
   900  		protoEnabled = true
   901  	}
   902  	schemaRegistry := namespace.NewSchemaRegistry(protoEnabled, logger)
   903  	// For application m3db client integration test convenience (where a local dbnode is started as a docker container),
   904  	// we allow loading user schema from local file into schema registry.
   905  	if protoEnabled {
   906  		for nsID, protoConfig := range cfg.Proto.SchemaRegistry {
   907  			dummyDeployID := "fromconfig"
   908  			if err := namespace.LoadSchemaRegistryFromFile(schemaRegistry, ident.StringID(nsID),
   909  				dummyDeployID,
   910  				protoConfig.SchemaFilePath, protoConfig.MessageName); err != nil {
   911  				logger.Fatal("could not load schema from configuration", zap.Error(err))
   912  			}
   913  		}
   914  	}
   915  
   916  	origin := topology.NewHost(hostID, "")
   917  	m3dbClient, err := newAdminClient(
   918  		cfg.Client, opts.ClockOptions(), iOpts, tchannelOpts, syncCfg.TopologyInitializer,
   919  		runtimeOptsMgr, origin, protoEnabled, schemaRegistry,
   920  		syncCfg.KVStore, opts.ContextPool(), opts.BytesPool(), opts.IdentifierPool(),
   921  		logger, runOpts.CustomOptions)
   922  	if err != nil {
   923  		logger.Fatal("could not create m3db client", zap.Error(err))
   924  	}
   925  
   926  	if runOpts.ClientCh != nil {
   927  		runOpts.ClientCh <- m3dbClient
   928  	}
   929  
   930  	documentsBuilderAlloc := index.NewBootstrapResultDocumentsBuilderAllocator(
   931  		opts.IndexOptions())
   932  	rsOpts := result.NewOptions().
   933  		SetClockOptions(opts.ClockOptions()).
   934  		SetInstrumentOptions(opts.InstrumentOptions()).
   935  		SetDatabaseBlockOptions(opts.DatabaseBlockOptions()).
   936  		SetSeriesCachePolicy(opts.SeriesCachePolicy()).
   937  		SetIndexDocumentsBuilderAllocator(documentsBuilderAlloc)
   938  
   939  	var repairClients []client.AdminClient
   940  	if cfg.Repair != nil && cfg.Repair.Enabled {
   941  		repairClients = append(repairClients, m3dbClient)
   942  	}
   943  	if cfg.Replication != nil {
   944  		for _, cluster := range cfg.Replication.Clusters {
   945  			if !cluster.RepairEnabled {
   946  				continue
   947  			}
   948  
   949  			// Pass nil for the topology initializer because we want to create
   950  			// a new one for the cluster we wish to replicate from, not use the
   951  			// same one as the cluster this node belongs to.
   952  			var topologyInitializer topology.Initializer
   953  			// Guaranteed to not be nil if repair is enabled by config validation.
   954  			clientCfg := *cluster.Client
   955  			clusterClient, err := newAdminClient(
   956  				clientCfg, opts.ClockOptions(), iOpts, tchannelOpts, topologyInitializer,
   957  				runtimeOptsMgr, origin, protoEnabled, schemaRegistry,
   958  				syncCfg.KVStore, opts.ContextPool(), opts.BytesPool(),
   959  				opts.IdentifierPool(), logger, runOpts.CustomOptions)
   960  			if err != nil {
   961  				logger.Fatal(
   962  					"unable to create client for replicated cluster",
   963  					zap.String("clusterName", cluster.Name), zap.Error(err))
   964  			}
   965  			repairClients = append(repairClients, clusterClient)
   966  		}
   967  	}
   968  	repairEnabled := len(repairClients) > 0
   969  	if repairEnabled {
   970  		repairOpts := opts.RepairOptions().
   971  			SetAdminClients(repairClients)
   972  
   973  		if repairCfg := cfg.Repair; repairCfg != nil {
   974  			repairOpts = repairOpts.
   975  				SetType(repairCfg.Type).
   976  				SetStrategy(repairCfg.Strategy).
   977  				SetForce(repairCfg.Force).
   978  				SetResultOptions(rsOpts).
   979  				SetDebugShadowComparisonsEnabled(cfg.Repair.DebugShadowComparisonsEnabled)
   980  			if cfg.Repair.Throttle > 0 {
   981  				repairOpts = repairOpts.SetRepairThrottle(cfg.Repair.Throttle)
   982  			}
   983  			if cfg.Repair.CheckInterval > 0 {
   984  				repairOpts = repairOpts.SetRepairCheckInterval(cfg.Repair.CheckInterval)
   985  			}
   986  			if cfg.Repair.Concurrency > 0 {
   987  				repairOpts = repairOpts.SetRepairShardConcurrency(cfg.Repair.Concurrency)
   988  			}
   989  
   990  			if cfg.Repair.DebugShadowComparisonsPercentage > 0 {
   991  				// Set conditionally to avoid stomping on the default value of 1.0.
   992  				repairOpts = repairOpts.SetDebugShadowComparisonsPercentage(cfg.Repair.DebugShadowComparisonsPercentage)
   993  			}
   994  		}
   995  
   996  		opts = opts.
   997  			SetRepairEnabled(true).
   998  			SetRepairOptions(repairOpts)
   999  	} else {
  1000  		opts = opts.SetRepairEnabled(false)
  1001  	}
  1002  
  1003  	// Set bootstrap options - We need to create a topology map provider from the
  1004  	// same topology that will be passed to the cluster so that when we make
  1005  	// bootstrapping decisions they are in sync with the clustered database
  1006  	// which is triggering the actual bootstraps. This way, when the clustered
  1007  	// database receives a topology update and decides to kick off a bootstrap,
  1008  	// the bootstrap process will receaive a topology map that is at least as
  1009  	// recent as the one that triggered the bootstrap, if not newer.
  1010  	// See GitHub issue #1013 for more details.
  1011  	topoMapProvider := newTopoMapProvider(topo)
  1012  	bs, err := cfg.Bootstrap.New(
  1013  		rsOpts, opts, topoMapProvider, origin, m3dbClient,
  1014  	)
  1015  	if err != nil {
  1016  		logger.Fatal("could not create bootstrap process", zap.Error(err))
  1017  	}
  1018  	opts = opts.SetBootstrapProcessProvider(bs)
  1019  
  1020  	// Start the cluster services now that the M3DB client is available.
  1021  	clusterListenAddress := cfg.ClusterListenAddressOrDefault()
  1022  	tchannelthriftClusterClose, err := ttcluster.NewServer(m3dbClient,
  1023  		clusterListenAddress, contextPool, tchannelOpts).ListenAndServe()
  1024  	if err != nil {
  1025  		logger.Fatal("could not open tchannelthrift interface",
  1026  			zap.String("address", clusterListenAddress), zap.Error(err))
  1027  	}
  1028  	defer tchannelthriftClusterClose()
  1029  	logger.Info("cluster tchannelthrift: listening", zap.String("address", clusterListenAddress))
  1030  
  1031  	httpClusterListenAddress := cfg.HTTPClusterListenAddressOrDefault()
  1032  	httpjsonClusterClose, err := hjcluster.NewServer(m3dbClient,
  1033  		httpClusterListenAddress, contextPool, nil).ListenAndServe()
  1034  	if err != nil {
  1035  		logger.Fatal("could not open httpjson interface",
  1036  			zap.String("address", httpClusterListenAddress), zap.Error(err))
  1037  	}
  1038  	defer httpjsonClusterClose()
  1039  	logger.Info("cluster httpjson: listening", zap.String("address", httpClusterListenAddress))
  1040  
  1041  	// Initialize clustered database.
  1042  	clusterTopoWatch, err := topo.Watch()
  1043  	if err != nil {
  1044  		logger.Fatal("could not create cluster topology watch", zap.Error(err))
  1045  	}
  1046  
  1047  	opts = opts.SetSchemaRegistry(schemaRegistry).
  1048  		SetAdminClient(m3dbClient)
  1049  
  1050  	db, err := cluster.NewDatabase(hostID, topo, clusterTopoWatch, opts)
  1051  	if err != nil {
  1052  		logger.Fatal("could not construct database", zap.Error(err))
  1053  	}
  1054  
  1055  	// Now that the database has been created it can be set as the block lease verifier
  1056  	// on the block lease manager.
  1057  	leaseVerifier := storage.NewLeaseVerifier(db)
  1058  	blockLeaseManager.SetLeaseVerifier(leaseVerifier)
  1059  
  1060  	if err := db.Open(); err != nil {
  1061  		logger.Fatal("could not open database", zap.Error(err))
  1062  	}
  1063  
  1064  	// Now that we've initialized the database we can set it on the service.
  1065  	service.SetDatabase(db)
  1066  
  1067  	go func() {
  1068  		if runOpts.BootstrapCh != nil {
  1069  			// Notify on bootstrap chan if specified.
  1070  			defer func() {
  1071  				runOpts.BootstrapCh <- struct{}{}
  1072  			}()
  1073  		}
  1074  
  1075  		// Bootstrap asynchronously so we can handle interrupt.
  1076  		if err := db.Bootstrap(); err != nil {
  1077  			logger.Fatal("could not bootstrap database", zap.Error(err))
  1078  		}
  1079  		logger.Info("bootstrapped")
  1080  
  1081  		// Only set the write new series limit after bootstrapping
  1082  		kvWatchNewSeriesLimitPerShard(syncCfg.KVStore, logger, topo,
  1083  			runtimeOptsMgr, cfg.Limits.WriteNewSeriesPerSecond)
  1084  		kvWatchEncodersPerBlockLimit(syncCfg.KVStore, logger,
  1085  			runtimeOptsMgr, cfg.Limits.MaxEncodersPerBlock)
  1086  		kvWatchQueryLimit(syncCfg.KVStore, logger,
  1087  			queryLimits.FetchDocsLimit(),
  1088  			queryLimits.BytesReadLimit(),
  1089  			// For backwards compatibility as M3 moves toward permits instead of time-based limits,
  1090  			// the series-read path uses permits which are implemented with limits, and so we support
  1091  			// dynamic updates to this limit-based permit still be passing downstream the limit itself.
  1092  			seriesReadPermits.Limit,
  1093  			queryLimits.AggregateDocsLimit(),
  1094  			limitOpts,
  1095  		)
  1096  	}()
  1097  
  1098  	// Stop our async watch and now block waiting for the interrupt.
  1099  	intWatchCancel()
  1100  	select {
  1101  	case <-interruptOpts.InterruptedCh:
  1102  		logger.Warn("interrupt already received. closing")
  1103  	default:
  1104  		xos.WaitForInterrupt(logger, interruptOpts)
  1105  	}
  1106  
  1107  	// Attempt graceful server close.
  1108  	closedCh := make(chan struct{})
  1109  	go func() {
  1110  		err := db.Terminate()
  1111  		if err != nil {
  1112  			logger.Error("close database error", zap.Error(err))
  1113  		}
  1114  		closedCh <- struct{}{}
  1115  	}()
  1116  
  1117  	// Wait then close or hard close.
  1118  	closeTimeout := serverGracefulCloseTimeout
  1119  	select {
  1120  	case <-closedCh:
  1121  		logger.Info("server closed")
  1122  	case <-time.After(closeTimeout):
  1123  		logger.Error("server closed after timeout", zap.Duration("timeout", closeTimeout))
  1124  	}
  1125  }
  1126  
  1127  func startDebugServer(
  1128  	debugWriter xdebug.ZipWriter,
  1129  	logger *zap.Logger,
  1130  	debugListenAddress string,
  1131  	mux *http.ServeMux,
  1132  ) func() {
  1133  	xdebug.RegisterPProfHandlers(mux)
  1134  	server := http.Server{Addr: debugListenAddress, Handler: mux}
  1135  
  1136  	if debugWriter != nil {
  1137  		if err := debugWriter.RegisterHandler(xdebug.DebugURL, mux); err != nil {
  1138  			logger.Error("unable to register debug writer endpoint", zap.Error(err))
  1139  		}
  1140  	}
  1141  
  1142  	go func() {
  1143  		if err := server.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
  1144  			logger.Error("debug server could not listen",
  1145  				zap.String("address", debugListenAddress), zap.Error(err))
  1146  		}
  1147  	}()
  1148  
  1149  	return func() {
  1150  		ctx, cancel := context.WithTimeout(context.Background(), debugServerGracefulCloseTimeout)
  1151  		defer cancel()
  1152  		if err := server.Shutdown(ctx); err != nil {
  1153  			logger.Warn("debug server failed to shutdown gracefully")
  1154  		} else {
  1155  			logger.Info("debug server closed")
  1156  		}
  1157  	}
  1158  }
  1159  
  1160  func bgValidateProcessLimits(logger *zap.Logger) {
  1161  	// If unable to validate process limits on the current configuration,
  1162  	// do not run background validator task.
  1163  	if canValidate, message := canValidateProcessLimits(); !canValidate {
  1164  		logger.Warn("cannot validate process limits: invalid configuration found",
  1165  			zap.String("message", message))
  1166  		return
  1167  	}
  1168  
  1169  	start := time.Now()
  1170  	t := time.NewTicker(bgProcessLimitInterval)
  1171  	defer t.Stop()
  1172  	for {
  1173  		// only monitor for first `maxBgProcessLimitMonitorDuration` of process lifetime
  1174  		if time.Since(start) > maxBgProcessLimitMonitorDuration {
  1175  			return
  1176  		}
  1177  
  1178  		err := validateProcessLimits()
  1179  		if err == nil {
  1180  			return
  1181  		}
  1182  
  1183  		logger.Warn("invalid configuration found, refer to linked documentation for more information",
  1184  			zap.String("url", xdocs.Path("operational_guide/kernel_configuration")),
  1185  			zap.Error(err),
  1186  		)
  1187  
  1188  		<-t.C
  1189  	}
  1190  }
  1191  
  1192  func kvWatchNewSeriesLimitPerShard(
  1193  	store kv.Store,
  1194  	logger *zap.Logger,
  1195  	topo topology.Topology,
  1196  	runtimeOptsMgr m3dbruntime.OptionsManager,
  1197  	defaultClusterNewSeriesLimit int,
  1198  ) {
  1199  	var initClusterLimit int
  1200  
  1201  	value, err := store.Get(kvconfig.ClusterNewSeriesInsertLimitKey)
  1202  	if err == nil {
  1203  		protoValue := &commonpb.Int64Proto{}
  1204  		err = value.Unmarshal(protoValue)
  1205  		if err == nil {
  1206  			initClusterLimit = int(protoValue.Value)
  1207  		}
  1208  	}
  1209  
  1210  	if err != nil {
  1211  		if err != kv.ErrNotFound {
  1212  			logger.Warn("error resolving cluster new series insert limit", zap.Error(err))
  1213  		}
  1214  		initClusterLimit = defaultClusterNewSeriesLimit
  1215  	}
  1216  
  1217  	err = setNewSeriesLimitPerShardOnChange(topo, runtimeOptsMgr, initClusterLimit)
  1218  	if err != nil {
  1219  		logger.Warn("unable to set cluster new series insert limit", zap.Error(err))
  1220  	}
  1221  
  1222  	watch, err := store.Watch(kvconfig.ClusterNewSeriesInsertLimitKey)
  1223  	if err != nil {
  1224  		logger.Error("could not watch cluster new series insert limit", zap.Error(err))
  1225  		return
  1226  	}
  1227  
  1228  	go func() {
  1229  		protoValue := &commonpb.Int64Proto{}
  1230  		for range watch.C() {
  1231  			value := defaultClusterNewSeriesLimit
  1232  			if newValue := watch.Get(); newValue != nil {
  1233  				if err := newValue.Unmarshal(protoValue); err != nil {
  1234  					logger.Warn("unable to parse new cluster new series insert limit", zap.Error(err))
  1235  					continue
  1236  				}
  1237  				value = int(protoValue.Value)
  1238  			}
  1239  
  1240  			err = setNewSeriesLimitPerShardOnChange(topo, runtimeOptsMgr, value)
  1241  			if err != nil {
  1242  				logger.Warn("unable to set cluster new series insert limit", zap.Error(err))
  1243  				continue
  1244  			}
  1245  		}
  1246  	}()
  1247  }
  1248  
  1249  func kvWatchEncodersPerBlockLimit(
  1250  	store kv.Store,
  1251  	logger *zap.Logger,
  1252  	runtimeOptsMgr m3dbruntime.OptionsManager,
  1253  	defaultEncodersPerBlockLimit int,
  1254  ) {
  1255  	var initEncoderLimit int
  1256  
  1257  	value, err := store.Get(kvconfig.EncodersPerBlockLimitKey)
  1258  	if err == nil {
  1259  		protoValue := &commonpb.Int64Proto{}
  1260  		err = value.Unmarshal(protoValue)
  1261  		if err == nil {
  1262  			initEncoderLimit = int(protoValue.Value)
  1263  		}
  1264  	}
  1265  
  1266  	if err != nil {
  1267  		if err != kv.ErrNotFound {
  1268  			logger.Warn("error resolving encoder per block limit", zap.Error(err))
  1269  		}
  1270  		initEncoderLimit = defaultEncodersPerBlockLimit
  1271  	}
  1272  
  1273  	err = setEncodersPerBlockLimitOnChange(runtimeOptsMgr, initEncoderLimit)
  1274  	if err != nil {
  1275  		logger.Warn("unable to set encoder per block limit", zap.Error(err))
  1276  	}
  1277  
  1278  	watch, err := store.Watch(kvconfig.EncodersPerBlockLimitKey)
  1279  	if err != nil {
  1280  		logger.Error("could not watch encoder per block limit", zap.Error(err))
  1281  		return
  1282  	}
  1283  
  1284  	go func() {
  1285  		protoValue := &commonpb.Int64Proto{}
  1286  		for range watch.C() {
  1287  			value := defaultEncodersPerBlockLimit
  1288  			if newValue := watch.Get(); newValue != nil {
  1289  				if err := newValue.Unmarshal(protoValue); err != nil {
  1290  					logger.Warn("unable to parse new encoder per block limit", zap.Error(err))
  1291  					continue
  1292  				}
  1293  				value = int(protoValue.Value)
  1294  			}
  1295  
  1296  			err = setEncodersPerBlockLimitOnChange(runtimeOptsMgr, value)
  1297  			if err != nil {
  1298  				logger.Warn("unable to set encoder per block limit", zap.Error(err))
  1299  				continue
  1300  			}
  1301  		}
  1302  	}()
  1303  }
  1304  
  1305  func kvWatchQueryLimit(
  1306  	store kv.Store,
  1307  	logger *zap.Logger,
  1308  	docsLimit limits.LookbackLimit,
  1309  	bytesReadLimit limits.LookbackLimit,
  1310  	diskSeriesReadLimit limits.LookbackLimit,
  1311  	aggregateDocsLimit limits.LookbackLimit,
  1312  	defaultOpts limits.Options,
  1313  ) {
  1314  	value, err := store.Get(kvconfig.QueryLimits)
  1315  	if err == nil {
  1316  		dynamicLimits := &kvpb.QueryLimits{}
  1317  		err = value.Unmarshal(dynamicLimits)
  1318  		if err == nil {
  1319  			updateQueryLimits(
  1320  				logger, docsLimit, bytesReadLimit, diskSeriesReadLimit,
  1321  				aggregateDocsLimit, dynamicLimits, defaultOpts)
  1322  		}
  1323  	} else if !errors.Is(err, kv.ErrNotFound) {
  1324  		logger.Warn("error resolving query limit", zap.Error(err))
  1325  	}
  1326  
  1327  	watch, err := store.Watch(kvconfig.QueryLimits)
  1328  	if err != nil {
  1329  		logger.Error("could not watch query limit", zap.Error(err))
  1330  		return
  1331  	}
  1332  
  1333  	go func() {
  1334  		dynamicLimits := &kvpb.QueryLimits{}
  1335  		for range watch.C() {
  1336  			if newValue := watch.Get(); newValue != nil {
  1337  				if err := newValue.Unmarshal(dynamicLimits); err != nil {
  1338  					logger.Warn("unable to parse new query limits", zap.Error(err))
  1339  					continue
  1340  				}
  1341  				updateQueryLimits(
  1342  					logger, docsLimit, bytesReadLimit, diskSeriesReadLimit,
  1343  					aggregateDocsLimit, dynamicLimits, defaultOpts)
  1344  			}
  1345  		}
  1346  	}()
  1347  }
  1348  
  1349  func updateQueryLimits(
  1350  	logger *zap.Logger,
  1351  	docsLimit limits.LookbackLimit,
  1352  	bytesReadLimit limits.LookbackLimit,
  1353  	diskSeriesReadLimit limits.LookbackLimit,
  1354  	aggregateDocsLimit limits.LookbackLimit,
  1355  	dynamicOpts *kvpb.QueryLimits,
  1356  	configOpts limits.Options,
  1357  ) {
  1358  	var (
  1359  		// Default to the config-based limits if unset in dynamic limits.
  1360  		// Otherwise, use the dynamic limit.
  1361  		docsLimitOpts           = configOpts.DocsLimitOpts()
  1362  		bytesReadLimitOpts      = configOpts.BytesReadLimitOpts()
  1363  		diskSeriesReadLimitOpts = configOpts.DiskSeriesReadLimitOpts()
  1364  		aggDocsLimitOpts        = configOpts.AggregateDocsLimitOpts()
  1365  	)
  1366  	if dynamicOpts != nil {
  1367  		if dynamicOpts.MaxRecentlyQueriedSeriesBlocks != nil {
  1368  			docsLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesBlocks)
  1369  		}
  1370  		if dynamicOpts.MaxRecentlyQueriedSeriesDiskBytesRead != nil {
  1371  			bytesReadLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesDiskBytesRead)
  1372  		}
  1373  		if dynamicOpts.MaxRecentlyQueriedSeriesDiskRead != nil {
  1374  			diskSeriesReadLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesDiskRead)
  1375  		}
  1376  		if dynamicOpts.MaxRecentlyQueriedMetadataRead != nil {
  1377  			aggDocsLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedMetadataRead)
  1378  		}
  1379  	}
  1380  
  1381  	if err := updateQueryLimit(docsLimit, docsLimitOpts); err != nil {
  1382  		logger.Error("error updating docs limit", zap.Error(err))
  1383  	}
  1384  
  1385  	if err := updateQueryLimit(bytesReadLimit, bytesReadLimitOpts); err != nil {
  1386  		logger.Error("error updating bytes read limit", zap.Error(err))
  1387  	}
  1388  
  1389  	if err := updateQueryLimit(diskSeriesReadLimit, diskSeriesReadLimitOpts); err != nil {
  1390  		logger.Error("error updating series read limit", zap.Error(err))
  1391  	}
  1392  
  1393  	if err := updateQueryLimit(aggregateDocsLimit, aggDocsLimitOpts); err != nil {
  1394  		logger.Error("error updating metadata read limit", zap.Error(err))
  1395  	}
  1396  }
  1397  
  1398  func updateQueryLimit(
  1399  	limit limits.LookbackLimit,
  1400  	newOpts limits.LookbackLimitOptions,
  1401  ) error {
  1402  	old := limit.Options()
  1403  	if old.Equals(newOpts) {
  1404  		return nil
  1405  	}
  1406  
  1407  	return limit.Update(newOpts)
  1408  }
  1409  
  1410  func dynamicLimitToLimitOpts(dynamicLimit *kvpb.QueryLimit) limits.LookbackLimitOptions {
  1411  	return limits.LookbackLimitOptions{
  1412  		Limit:         dynamicLimit.Limit,
  1413  		Lookback:      time.Duration(dynamicLimit.LookbackSeconds) * time.Second,
  1414  		ForceExceeded: dynamicLimit.ForceExceeded,
  1415  		ForceWaited:   dynamicLimit.ForceWaited,
  1416  	}
  1417  }
  1418  
  1419  func kvWatchClientConsistencyLevels(
  1420  	store kv.Store,
  1421  	logger *zap.Logger,
  1422  	clientOpts client.AdminOptions,
  1423  	runtimeOptsMgr m3dbruntime.OptionsManager,
  1424  ) {
  1425  	setReadConsistencyLevel := func(
  1426  		v string,
  1427  		applyFn func(topology.ReadConsistencyLevel, m3dbruntime.Options) m3dbruntime.Options,
  1428  	) error {
  1429  		for _, level := range topology.ValidReadConsistencyLevels() {
  1430  			if level.String() == v {
  1431  				runtimeOpts := applyFn(level, runtimeOptsMgr.Get())
  1432  				return runtimeOptsMgr.Update(runtimeOpts)
  1433  			}
  1434  		}
  1435  		return fmt.Errorf("invalid read consistency level set: %s", v)
  1436  	}
  1437  
  1438  	setConsistencyLevel := func(
  1439  		v string,
  1440  		applyFn func(topology.ConsistencyLevel, m3dbruntime.Options) m3dbruntime.Options,
  1441  	) error {
  1442  		for _, level := range topology.ValidConsistencyLevels() {
  1443  			if level.String() == v {
  1444  				runtimeOpts := applyFn(level, runtimeOptsMgr.Get())
  1445  				return runtimeOptsMgr.Update(runtimeOpts)
  1446  			}
  1447  		}
  1448  		return fmt.Errorf("invalid consistency level set: %s", v)
  1449  	}
  1450  
  1451  	kvWatchStringValue(store, logger,
  1452  		kvconfig.ClientBootstrapConsistencyLevel,
  1453  		func(value string) error {
  1454  			return setReadConsistencyLevel(value,
  1455  				func(level topology.ReadConsistencyLevel, opts m3dbruntime.Options) m3dbruntime.Options {
  1456  					return opts.SetClientBootstrapConsistencyLevel(level)
  1457  				})
  1458  		},
  1459  		func() error {
  1460  			return runtimeOptsMgr.Update(runtimeOptsMgr.Get().
  1461  				SetClientBootstrapConsistencyLevel(clientOpts.BootstrapConsistencyLevel()))
  1462  		})
  1463  
  1464  	kvWatchStringValue(store, logger,
  1465  		kvconfig.ClientReadConsistencyLevel,
  1466  		func(value string) error {
  1467  			return setReadConsistencyLevel(value,
  1468  				func(level topology.ReadConsistencyLevel, opts m3dbruntime.Options) m3dbruntime.Options {
  1469  					return opts.SetClientReadConsistencyLevel(level)
  1470  				})
  1471  		},
  1472  		func() error {
  1473  			return runtimeOptsMgr.Update(runtimeOptsMgr.Get().
  1474  				SetClientReadConsistencyLevel(clientOpts.ReadConsistencyLevel()))
  1475  		})
  1476  
  1477  	kvWatchStringValue(store, logger,
  1478  		kvconfig.ClientWriteConsistencyLevel,
  1479  		func(value string) error {
  1480  			return setConsistencyLevel(value,
  1481  				func(level topology.ConsistencyLevel, opts m3dbruntime.Options) m3dbruntime.Options {
  1482  					return opts.SetClientWriteConsistencyLevel(level)
  1483  				})
  1484  		},
  1485  		func() error {
  1486  			return runtimeOptsMgr.Update(runtimeOptsMgr.Get().
  1487  				SetClientWriteConsistencyLevel(clientOpts.WriteConsistencyLevel()))
  1488  		})
  1489  }
  1490  
  1491  func kvWatchStringValue(
  1492  	store kv.Store,
  1493  	logger *zap.Logger,
  1494  	key string,
  1495  	onValue func(value string) error,
  1496  	onDelete func() error,
  1497  ) {
  1498  	protoValue := &commonpb.StringProto{}
  1499  
  1500  	// First try to eagerly set the value so it doesn't flap if the
  1501  	// watch returns but not immediately for an existing value
  1502  	value, err := store.Get(key)
  1503  	if err != nil && err != kv.ErrNotFound {
  1504  		logger.Error("could not resolve KV", zap.String("key", key), zap.Error(err))
  1505  	}
  1506  	if err == nil {
  1507  		if err := value.Unmarshal(protoValue); err != nil {
  1508  			logger.Error("could not unmarshal KV key", zap.String("key", key), zap.Error(err))
  1509  		} else if err := onValue(protoValue.Value); err != nil {
  1510  			logger.Error("could not process value of KV", zap.String("key", key), zap.Error(err))
  1511  		} else {
  1512  			logger.Info("set KV key", zap.String("key", key), zap.Any("value", protoValue.Value))
  1513  		}
  1514  	}
  1515  
  1516  	watch, err := store.Watch(key)
  1517  	if err != nil {
  1518  		logger.Error("could not watch KV key", zap.String("key", key), zap.Error(err))
  1519  		return
  1520  	}
  1521  
  1522  	go func() {
  1523  		for range watch.C() {
  1524  			newValue := watch.Get()
  1525  			if newValue == nil {
  1526  				if err := onDelete(); err != nil {
  1527  					logger.Warn("could not set default for KV key", zap.String("key", key), zap.Error(err))
  1528  				}
  1529  				continue
  1530  			}
  1531  
  1532  			err := newValue.Unmarshal(protoValue)
  1533  			if err != nil {
  1534  				logger.Warn("could not unmarshal KV key", zap.String("key", key), zap.Error(err))
  1535  				continue
  1536  			}
  1537  			if err := onValue(protoValue.Value); err != nil {
  1538  				logger.Warn("could not process change for KV key", zap.String("key", key), zap.Error(err))
  1539  				continue
  1540  			}
  1541  			logger.Info("set KV key", zap.String("key", key), zap.Any("value", protoValue.Value))
  1542  		}
  1543  	}()
  1544  }
  1545  
  1546  func setNewSeriesLimitPerShardOnChange(
  1547  	topo topology.Topology,
  1548  	runtimeOptsMgr m3dbruntime.OptionsManager,
  1549  	clusterLimit int,
  1550  ) error {
  1551  	perPlacedShardLimit := clusterLimitToPlacedShardLimit(topo, clusterLimit)
  1552  	runtimeOpts := runtimeOptsMgr.Get()
  1553  	if runtimeOpts.WriteNewSeriesLimitPerShardPerSecond() == perPlacedShardLimit {
  1554  		// Not changed, no need to set the value and trigger a runtime options update
  1555  		return nil
  1556  	}
  1557  
  1558  	newRuntimeOpts := runtimeOpts.
  1559  		SetWriteNewSeriesLimitPerShardPerSecond(perPlacedShardLimit)
  1560  	return runtimeOptsMgr.Update(newRuntimeOpts)
  1561  }
  1562  
  1563  func clusterLimitToPlacedShardLimit(topo topology.Topology, clusterLimit int) int {
  1564  	if clusterLimit < 1 {
  1565  		return 0
  1566  	}
  1567  	topoMap := topo.Get()
  1568  	numShards := len(topoMap.ShardSet().AllIDs())
  1569  	numPlacedShards := numShards * topoMap.Replicas()
  1570  	if numPlacedShards < 1 {
  1571  		return 0
  1572  	}
  1573  	nodeLimit := int(math.Ceil(
  1574  		float64(clusterLimit) / float64(numPlacedShards)))
  1575  	return nodeLimit
  1576  }
  1577  
  1578  func setEncodersPerBlockLimitOnChange(
  1579  	runtimeOptsMgr m3dbruntime.OptionsManager,
  1580  	encoderLimit int,
  1581  ) error {
  1582  	runtimeOpts := runtimeOptsMgr.Get()
  1583  	if runtimeOpts.EncodersPerBlockLimit() == encoderLimit {
  1584  		// Not changed, no need to set the value and trigger a runtime options update
  1585  		return nil
  1586  	}
  1587  
  1588  	newRuntimeOpts := runtimeOpts.
  1589  		SetEncodersPerBlockLimit(encoderLimit)
  1590  	return runtimeOptsMgr.Update(newRuntimeOpts)
  1591  }
  1592  
  1593  func withEncodingAndPoolingOptions(
  1594  	cfg config.DBConfiguration,
  1595  	logger *zap.Logger,
  1596  	opts storage.Options,
  1597  	policy config.PoolingPolicy,
  1598  ) storage.Options {
  1599  	iOpts := opts.InstrumentOptions()
  1600  	scope := opts.InstrumentOptions().MetricsScope()
  1601  
  1602  	// Set the byte slice capacities for the thrift pooling.
  1603  	thriftBytesAllocSizes := policy.ThriftBytesPoolAllocSizesOrDefault()
  1604  	logger.Info("set thrift bytes pool slice sizes",
  1605  		zap.Ints("sizes", thriftBytesAllocSizes))
  1606  	apachethrift.SetMaxBytesPoolAlloc(thriftBytesAllocSizes...)
  1607  
  1608  	bytesPoolOpts := pool.NewObjectPoolOptions().
  1609  		SetInstrumentOptions(iOpts.SetMetricsScope(scope.SubScope("bytes-pool")))
  1610  	checkedBytesPoolOpts := bytesPoolOpts.
  1611  		SetInstrumentOptions(iOpts.SetMetricsScope(scope.SubScope("checked-bytes-pool")))
  1612  
  1613  	buckets := make([]pool.Bucket, len(policy.BytesPool.Buckets))
  1614  	for i, bucket := range policy.BytesPool.Buckets {
  1615  		var b pool.Bucket
  1616  		b.Capacity = bucket.CapacityOrDefault()
  1617  		b.Count = bucket.SizeOrDefault()
  1618  		b.Options = bytesPoolOpts.
  1619  			SetRefillLowWatermark(bucket.RefillLowWaterMarkOrDefault()).
  1620  			SetRefillHighWatermark(bucket.RefillHighWaterMarkOrDefault())
  1621  		buckets[i] = b
  1622  
  1623  		logger.Info("bytes pool configured",
  1624  			zap.Int("capacity", bucket.CapacityOrDefault()),
  1625  			zap.Int("size", int(bucket.SizeOrDefault())),
  1626  			zap.Float64("refillLowWaterMark", bucket.RefillLowWaterMarkOrDefault()),
  1627  			zap.Float64("refillHighWaterMark", bucket.RefillHighWaterMarkOrDefault()))
  1628  	}
  1629  
  1630  	var bytesPool pool.CheckedBytesPool
  1631  	switch policy.TypeOrDefault() {
  1632  	case config.SimplePooling:
  1633  		bytesPool = pool.NewCheckedBytesPool(
  1634  			buckets,
  1635  			checkedBytesPoolOpts,
  1636  			func(s []pool.Bucket) pool.BytesPool {
  1637  				return pool.NewBytesPool(s, bytesPoolOpts)
  1638  			})
  1639  	default:
  1640  		logger.Fatal("unrecognized pooling type", zap.Any("type", policy.Type))
  1641  	}
  1642  
  1643  	{
  1644  		// Avoid polluting the rest of the function with `l` var
  1645  		l := logger
  1646  		if t := policy.Type; t != nil {
  1647  			l = l.With(zap.String("policy", string(*t)))
  1648  		}
  1649  
  1650  		l.Info("bytes pool init start")
  1651  		bytesPool.Init()
  1652  		l.Info("bytes pool init end")
  1653  	}
  1654  
  1655  	segmentReaderPool := xio.NewSegmentReaderPool(
  1656  		poolOptions(
  1657  			policy.SegmentReaderPool,
  1658  			scope.SubScope("segment-reader-pool")))
  1659  	segmentReaderPool.Init()
  1660  
  1661  	encoderPool := encoding.NewEncoderPool(
  1662  		poolOptions(
  1663  			policy.EncoderPool,
  1664  			scope.SubScope("encoder-pool")))
  1665  
  1666  	closersPoolOpts := poolOptions(
  1667  		policy.ClosersPool,
  1668  		scope.SubScope("closers-pool"))
  1669  
  1670  	contextPoolOpts := poolOptions(
  1671  		policy.ContextPool,
  1672  		scope.SubScope("context-pool"))
  1673  
  1674  	contextPool := xcontext.NewPool(xcontext.NewOptions().
  1675  		SetContextPoolOptions(contextPoolOpts).
  1676  		SetFinalizerPoolOptions(closersPoolOpts))
  1677  
  1678  	iteratorPool := encoding.NewReaderIteratorPool(
  1679  		poolOptions(
  1680  			policy.IteratorPool,
  1681  			scope.SubScope("iterator-pool")))
  1682  
  1683  	multiIteratorPool := encoding.NewMultiReaderIteratorPool(
  1684  		poolOptions(
  1685  			policy.IteratorPool,
  1686  			scope.SubScope("multi-iterator-pool")))
  1687  
  1688  	writeBatchPoolInitialBatchSize := 0
  1689  	if policy.WriteBatchPool.InitialBatchSize != nil {
  1690  		// Use config value if available.
  1691  		writeBatchPoolInitialBatchSize = *policy.WriteBatchPool.InitialBatchSize
  1692  	}
  1693  
  1694  	var writeBatchPoolMaxBatchSize *int
  1695  	if policy.WriteBatchPool.MaxBatchSize != nil {
  1696  		writeBatchPoolMaxBatchSize = policy.WriteBatchPool.MaxBatchSize
  1697  	}
  1698  
  1699  	var writeBatchPoolSize int
  1700  	if policy.WriteBatchPool.Size != nil {
  1701  		writeBatchPoolSize = *policy.WriteBatchPool.Size
  1702  	} else {
  1703  		// If no value set, calculate a reasonable value based on the commit log
  1704  		// queue size. We base it off the commitlog queue size because we will
  1705  		// want to be able to buffer at least one full commitlog queues worth of
  1706  		// writes without allocating because these objects are very expensive to
  1707  		// allocate.
  1708  		commitlogQueueSize := opts.CommitLogOptions().BacklogQueueSize()
  1709  		expectedBatchSize := writeBatchPoolInitialBatchSize
  1710  		if expectedBatchSize == 0 {
  1711  			expectedBatchSize = client.DefaultWriteBatchSize
  1712  		}
  1713  		writeBatchPoolSize = commitlogQueueSize / expectedBatchSize
  1714  	}
  1715  
  1716  	writeBatchPoolOpts := pool.NewObjectPoolOptions()
  1717  	writeBatchPoolOpts = writeBatchPoolOpts.
  1718  		SetSize(writeBatchPoolSize).
  1719  		// Set watermarks to zero because this pool is sized to be as large as we
  1720  		// ever need it to be, so background allocations are usually wasteful.
  1721  		SetRefillLowWatermark(0.0).
  1722  		SetRefillHighWatermark(0.0).
  1723  		SetInstrumentOptions(
  1724  			writeBatchPoolOpts.
  1725  				InstrumentOptions().
  1726  				SetMetricsScope(scope.SubScope("write-batch-pool")))
  1727  
  1728  	writeBatchPool := writes.NewWriteBatchPool(
  1729  		writeBatchPoolOpts,
  1730  		writeBatchPoolInitialBatchSize,
  1731  		writeBatchPoolMaxBatchSize)
  1732  
  1733  	tagPoolPolicy := policy.TagsPool
  1734  	identifierPool := ident.NewPool(bytesPool, ident.PoolOptions{
  1735  		IDPoolOptions: poolOptions(
  1736  			policy.IdentifierPool, scope.SubScope("identifier-pool")),
  1737  		TagsPoolOptions: maxCapacityPoolOptions(tagPoolPolicy, scope.SubScope("tags-pool")),
  1738  		TagsCapacity:    tagPoolPolicy.CapacityOrDefault(),
  1739  		TagsMaxCapacity: tagPoolPolicy.MaxCapacityOrDefault(),
  1740  		TagsIteratorPoolOptions: poolOptions(
  1741  			policy.TagsIteratorPool,
  1742  			scope.SubScope("tags-iterator-pool")),
  1743  	})
  1744  
  1745  	fetchBlockMetadataResultsPoolPolicy := policy.FetchBlockMetadataResultsPool
  1746  	fetchBlockMetadataResultsPool := block.NewFetchBlockMetadataResultsPool(
  1747  		capacityPoolOptions(
  1748  			fetchBlockMetadataResultsPoolPolicy,
  1749  			scope.SubScope("fetch-block-metadata-results-pool")),
  1750  		fetchBlockMetadataResultsPoolPolicy.CapacityOrDefault())
  1751  
  1752  	fetchBlocksMetadataResultsPoolPolicy := policy.FetchBlocksMetadataResultsPool
  1753  	fetchBlocksMetadataResultsPool := block.NewFetchBlocksMetadataResultsPool(
  1754  		capacityPoolOptions(
  1755  			fetchBlocksMetadataResultsPoolPolicy,
  1756  			scope.SubScope("fetch-blocks-metadata-results-pool")),
  1757  		fetchBlocksMetadataResultsPoolPolicy.CapacityOrDefault())
  1758  
  1759  	bytesWrapperPoolOpts := poolOptions(
  1760  		policy.CheckedBytesWrapperPool,
  1761  		scope.SubScope("checked-bytes-wrapper-pool"))
  1762  	bytesWrapperPool := xpool.NewCheckedBytesWrapperPool(
  1763  		bytesWrapperPoolOpts)
  1764  	bytesWrapperPool.Init()
  1765  
  1766  	encodingOpts := encoding.NewOptions().
  1767  		SetEncoderPool(encoderPool).
  1768  		SetReaderIteratorPool(iteratorPool).
  1769  		SetBytesPool(bytesPool).
  1770  		SetSegmentReaderPool(segmentReaderPool).
  1771  		SetCheckedBytesWrapperPool(bytesWrapperPool).
  1772  		SetMetrics(encoding.NewMetrics(scope))
  1773  
  1774  	encoderPool.Init(func() encoding.Encoder {
  1775  		if cfg.Proto != nil && cfg.Proto.Enabled {
  1776  			enc := proto.NewEncoder(0, encodingOpts)
  1777  			return enc
  1778  		}
  1779  
  1780  		return m3tsz.NewEncoder(0, nil, m3tsz.DefaultIntOptimizationEnabled, encodingOpts)
  1781  	})
  1782  
  1783  	iteratorPool.Init(func(r xio.Reader64, descr namespace.SchemaDescr) encoding.ReaderIterator {
  1784  		if cfg.Proto != nil && cfg.Proto.Enabled {
  1785  			return proto.NewIterator(r, descr, encodingOpts)
  1786  		}
  1787  		return m3tsz.NewReaderIterator(r, m3tsz.DefaultIntOptimizationEnabled, encodingOpts)
  1788  	})
  1789  
  1790  	multiIteratorPool.Init(func(r xio.Reader64, descr namespace.SchemaDescr) encoding.ReaderIterator {
  1791  		iter := iteratorPool.Get()
  1792  		iter.Reset(r, descr)
  1793  		return iter
  1794  	})
  1795  
  1796  	writeBatchPool.Init()
  1797  
  1798  	bucketPool := series.NewBufferBucketPool(
  1799  		poolOptions(policy.BufferBucketPool, scope.SubScope("buffer-bucket-pool")))
  1800  	bucketVersionsPool := series.NewBufferBucketVersionsPool(
  1801  		poolOptions(policy.BufferBucketVersionsPool, scope.SubScope("buffer-bucket-versions-pool")))
  1802  
  1803  	retrieveRequestPool := fs.NewRetrieveRequestPool(segmentReaderPool,
  1804  		poolOptions(policy.RetrieveRequestPool, scope.SubScope("retrieve-request-pool")))
  1805  	retrieveRequestPool.Init()
  1806  
  1807  	opts = opts.
  1808  		SetBytesPool(bytesPool).
  1809  		SetContextPool(contextPool).
  1810  		SetEncoderPool(encoderPool).
  1811  		SetReaderIteratorPool(iteratorPool).
  1812  		SetMultiReaderIteratorPool(multiIteratorPool).
  1813  		SetIdentifierPool(identifierPool).
  1814  		SetFetchBlockMetadataResultsPool(fetchBlockMetadataResultsPool).
  1815  		SetFetchBlocksMetadataResultsPool(fetchBlocksMetadataResultsPool).
  1816  		SetWriteBatchPool(writeBatchPool).
  1817  		SetBufferBucketPool(bucketPool).
  1818  		SetBufferBucketVersionsPool(bucketVersionsPool).
  1819  		SetRetrieveRequestPool(retrieveRequestPool).
  1820  		SetCheckedBytesWrapperPool(bytesWrapperPool)
  1821  
  1822  	blockOpts := opts.DatabaseBlockOptions().
  1823  		SetDatabaseBlockAllocSize(policy.BlockAllocSizeOrDefault()).
  1824  		SetContextPool(contextPool).
  1825  		SetEncoderPool(encoderPool).
  1826  		SetReaderIteratorPool(iteratorPool).
  1827  		SetMultiReaderIteratorPool(multiIteratorPool).
  1828  		SetSegmentReaderPool(segmentReaderPool).
  1829  		SetBytesPool(bytesPool)
  1830  
  1831  	if opts.SeriesCachePolicy() == series.CacheLRU {
  1832  		var (
  1833  			runtimeOpts   = opts.RuntimeOptionsManager()
  1834  			wiredListOpts = block.WiredListOptions{
  1835  				RuntimeOptionsManager: runtimeOpts,
  1836  				InstrumentOptions:     iOpts,
  1837  				ClockOptions:          opts.ClockOptions(),
  1838  			}
  1839  			lruCfg = cfg.Cache.SeriesConfiguration().LRU
  1840  		)
  1841  
  1842  		if lruCfg != nil && lruCfg.EventsChannelSize > 0 {
  1843  			wiredListOpts.EventsChannelSize = int(lruCfg.EventsChannelSize)
  1844  		}
  1845  		wiredList := block.NewWiredList(wiredListOpts)
  1846  		blockOpts = blockOpts.SetWiredList(wiredList)
  1847  	}
  1848  	blockPool := block.NewDatabaseBlockPool(
  1849  		poolOptions(
  1850  			policy.BlockPool,
  1851  			scope.SubScope("block-pool")))
  1852  	blockPool.Init(func() block.DatabaseBlock {
  1853  		return block.NewDatabaseBlock(0, 0, ts.Segment{}, blockOpts, namespace.Context{})
  1854  	})
  1855  	blockOpts = blockOpts.SetDatabaseBlockPool(blockPool)
  1856  	opts = opts.SetDatabaseBlockOptions(blockOpts)
  1857  
  1858  	// NB(prateek): retention opts are overridden per namespace during series creation
  1859  	retentionOpts := retention.NewOptions()
  1860  	seriesOpts := storage.NewSeriesOptionsFromOptions(opts, retentionOpts).
  1861  		SetFetchBlockMetadataResultsPool(opts.FetchBlockMetadataResultsPool())
  1862  	seriesPool := series.NewDatabaseSeriesPool(
  1863  		poolOptions(
  1864  			policy.SeriesPool,
  1865  			scope.SubScope("series-pool")))
  1866  
  1867  	opts = opts.
  1868  		SetSeriesOptions(seriesOpts).
  1869  		SetDatabaseSeriesPool(seriesPool)
  1870  	opts = opts.SetCommitLogOptions(opts.CommitLogOptions().
  1871  		SetBytesPool(bytesPool).
  1872  		SetIdentifierPool(identifierPool))
  1873  
  1874  	postingsListOpts := poolOptions(policy.PostingsListPool, scope.SubScope("postingslist-pool"))
  1875  	postingsList := postings.NewPool(postingsListOpts, roaring.NewPostingsList)
  1876  
  1877  	queryResultsPool := index.NewQueryResultsPool(
  1878  		poolOptions(policy.IndexResultsPool, scope.SubScope("index-query-results-pool")))
  1879  	aggregateQueryResultsPool := index.NewAggregateResultsPool(
  1880  		poolOptions(policy.IndexResultsPool, scope.SubScope("index-aggregate-results-pool")))
  1881  	aggregateQueryValuesPool := index.NewAggregateValuesPool(
  1882  		poolOptions(policy.IndexResultsPool, scope.SubScope("index-aggregate-values-pool")))
  1883  
  1884  	// Set value transformation options.
  1885  	opts = opts.SetTruncateType(cfg.Transforms.TruncateBy)
  1886  	forcedValue := cfg.Transforms.ForcedValue
  1887  	if forcedValue != nil {
  1888  		opts = opts.SetWriteTransformOptions(series.WriteTransformOptions{
  1889  			ForceValueEnabled: true,
  1890  			ForceValue:        *forcedValue,
  1891  		})
  1892  	}
  1893  
  1894  	// Set index options.
  1895  	indexOpts := opts.IndexOptions().
  1896  		SetInstrumentOptions(iOpts).
  1897  		SetMemSegmentOptions(
  1898  			opts.IndexOptions().MemSegmentOptions().
  1899  				SetPostingsListPool(postingsList).
  1900  				SetInstrumentOptions(iOpts)).
  1901  		SetFSTSegmentOptions(
  1902  			opts.IndexOptions().FSTSegmentOptions().
  1903  				SetPostingsListPool(postingsList).
  1904  				SetInstrumentOptions(iOpts).
  1905  				SetContextPool(opts.ContextPool())).
  1906  		SetSegmentBuilderOptions(
  1907  			opts.IndexOptions().SegmentBuilderOptions().
  1908  				SetPostingsListPool(postingsList)).
  1909  		SetIdentifierPool(identifierPool).
  1910  		SetCheckedBytesPool(bytesPool).
  1911  		SetQueryResultsPool(queryResultsPool).
  1912  		SetAggregateResultsPool(aggregateQueryResultsPool).
  1913  		SetAggregateValuesPool(aggregateQueryValuesPool).
  1914  		SetForwardIndexProbability(cfg.Index.ForwardIndexProbability).
  1915  		SetForwardIndexThreshold(cfg.Index.ForwardIndexThreshold)
  1916  
  1917  	queryResultsPool.Init(func() index.QueryResults {
  1918  		// NB(r): Need to initialize after setting the index opts so
  1919  		// it sees the same reference of the options as is set for the DB.
  1920  		return index.NewQueryResults(nil, index.QueryResultsOptions{}, indexOpts)
  1921  	})
  1922  	aggregateQueryResultsPool.Init(func() index.AggregateResults {
  1923  		// NB(r): Need to initialize after setting the index opts so
  1924  		// it sees the same reference of the options as is set for the DB.
  1925  		return index.NewAggregateResults(nil, index.AggregateResultsOptions{}, indexOpts)
  1926  	})
  1927  	aggregateQueryValuesPool.Init(func() index.AggregateValues {
  1928  		// NB(r): Need to initialize after setting the index opts so
  1929  		// it sees the same reference of the options as is set for the DB.
  1930  		return index.NewAggregateValues(indexOpts)
  1931  	})
  1932  
  1933  	return opts.SetIndexOptions(indexOpts)
  1934  }
  1935  
  1936  func newAdminClient(
  1937  	config client.Configuration,
  1938  	clockOpts clock.Options,
  1939  	iOpts instrument.Options,
  1940  	tchannelOpts *tchannel.ChannelOptions,
  1941  	topologyInitializer topology.Initializer,
  1942  	runtimeOptsMgr m3dbruntime.OptionsManager,
  1943  	origin topology.Host,
  1944  	protoEnabled bool,
  1945  	schemaRegistry namespace.SchemaRegistry,
  1946  	kvStore kv.Store,
  1947  	contextPool xcontext.Pool,
  1948  	checkedBytesPool pool.CheckedBytesPool,
  1949  	identifierPool ident.Pool,
  1950  	logger *zap.Logger,
  1951  	custom []client.CustomAdminOption,
  1952  ) (client.AdminClient, error) {
  1953  	if config.EnvironmentConfig != nil {
  1954  		// If the user has provided an override for the dynamic client configuration
  1955  		// then we need to honor it by not passing our own topology initializer.
  1956  		topologyInitializer = nil
  1957  	}
  1958  
  1959  	// NB: append custom options coming from run options to existing options.
  1960  	options := []client.CustomAdminOption{
  1961  		func(opts client.AdminOptions) client.AdminOptions {
  1962  			return opts.SetChannelOptions(tchannelOpts).(client.AdminOptions)
  1963  		},
  1964  		func(opts client.AdminOptions) client.AdminOptions {
  1965  			return opts.SetRuntimeOptionsManager(runtimeOptsMgr).(client.AdminOptions)
  1966  		},
  1967  		func(opts client.AdminOptions) client.AdminOptions {
  1968  			return opts.SetContextPool(contextPool).(client.AdminOptions)
  1969  		},
  1970  		func(opts client.AdminOptions) client.AdminOptions {
  1971  			return opts.SetCheckedBytesPool(checkedBytesPool).(client.AdminOptions)
  1972  		},
  1973  		func(opts client.AdminOptions) client.AdminOptions {
  1974  			return opts.SetIdentifierPool(identifierPool).(client.AdminOptions)
  1975  		},
  1976  		func(opts client.AdminOptions) client.AdminOptions {
  1977  			return opts.SetOrigin(origin).(client.AdminOptions)
  1978  		},
  1979  		func(opts client.AdminOptions) client.AdminOptions {
  1980  			if protoEnabled {
  1981  				return opts.SetEncodingProto(encoding.NewOptions()).(client.AdminOptions)
  1982  			}
  1983  			return opts
  1984  		},
  1985  		func(opts client.AdminOptions) client.AdminOptions {
  1986  			return opts.SetSchemaRegistry(schemaRegistry).(client.AdminOptions)
  1987  		},
  1988  	}
  1989  
  1990  	options = append(options, custom...)
  1991  	m3dbClient, err := config.NewAdminClient(
  1992  		client.ConfigurationParameters{
  1993  			ClockOptions: clockOpts,
  1994  			InstrumentOptions: iOpts.
  1995  				SetMetricsScope(iOpts.MetricsScope().SubScope("m3dbclient")),
  1996  			TopologyInitializer: topologyInitializer,
  1997  		},
  1998  		options...,
  1999  	)
  2000  	if err != nil {
  2001  		return nil, err
  2002  	}
  2003  
  2004  	// Kick off runtime options manager KV watches.
  2005  	clientAdminOpts := m3dbClient.Options().(client.AdminOptions)
  2006  	kvWatchClientConsistencyLevels(kvStore, logger,
  2007  		clientAdminOpts, runtimeOptsMgr)
  2008  	return m3dbClient, nil
  2009  }
  2010  
  2011  func poolOptions(
  2012  	policy config.PoolPolicy,
  2013  	scope tally.Scope,
  2014  ) pool.ObjectPoolOptions {
  2015  	var (
  2016  		opts                = pool.NewObjectPoolOptions()
  2017  		size                = policy.SizeOrDefault()
  2018  		refillLowWaterMark  = policy.RefillLowWaterMarkOrDefault()
  2019  		refillHighWaterMark = policy.RefillHighWaterMarkOrDefault()
  2020  	)
  2021  
  2022  	if size > 0 {
  2023  		opts = opts.SetSize(int(size))
  2024  		if refillLowWaterMark > 0 &&
  2025  			refillHighWaterMark > 0 &&
  2026  			refillHighWaterMark > refillLowWaterMark {
  2027  			opts = opts.
  2028  				SetRefillLowWatermark(refillLowWaterMark).
  2029  				SetRefillHighWatermark(refillHighWaterMark)
  2030  		}
  2031  	}
  2032  	opts = opts.SetDynamic(size.IsDynamic())
  2033  
  2034  	if scope != nil {
  2035  		opts = opts.SetInstrumentOptions(opts.InstrumentOptions().
  2036  			SetMetricsScope(scope))
  2037  	}
  2038  	return opts
  2039  }
  2040  
  2041  func capacityPoolOptions(
  2042  	policy config.CapacityPoolPolicy,
  2043  	scope tally.Scope,
  2044  ) pool.ObjectPoolOptions {
  2045  	var (
  2046  		opts                = pool.NewObjectPoolOptions()
  2047  		size                = policy.SizeOrDefault()
  2048  		refillLowWaterMark  = policy.RefillLowWaterMarkOrDefault()
  2049  		refillHighWaterMark = policy.RefillHighWaterMarkOrDefault()
  2050  	)
  2051  
  2052  	if size > 0 {
  2053  		opts = opts.SetSize(int(size))
  2054  		if refillLowWaterMark > 0 &&
  2055  			refillHighWaterMark > 0 &&
  2056  			refillHighWaterMark > refillLowWaterMark {
  2057  			opts = opts.SetRefillLowWatermark(refillLowWaterMark)
  2058  			opts = opts.SetRefillHighWatermark(refillHighWaterMark)
  2059  		}
  2060  	}
  2061  	opts = opts.SetDynamic(size.IsDynamic())
  2062  
  2063  	if scope != nil {
  2064  		opts = opts.SetInstrumentOptions(opts.InstrumentOptions().
  2065  			SetMetricsScope(scope))
  2066  	}
  2067  	return opts
  2068  }
  2069  
  2070  func maxCapacityPoolOptions(
  2071  	policy config.MaxCapacityPoolPolicy,
  2072  	scope tally.Scope,
  2073  ) pool.ObjectPoolOptions {
  2074  	var (
  2075  		opts                = pool.NewObjectPoolOptions()
  2076  		size                = policy.SizeOrDefault()
  2077  		refillLowWaterMark  = policy.RefillLowWaterMarkOrDefault()
  2078  		refillHighWaterMark = policy.RefillHighWaterMarkOrDefault()
  2079  	)
  2080  
  2081  	if size > 0 {
  2082  		opts = opts.SetSize(int(size))
  2083  		if refillLowWaterMark > 0 &&
  2084  			refillHighWaterMark > 0 &&
  2085  			refillHighWaterMark > refillLowWaterMark {
  2086  			opts = opts.SetRefillLowWatermark(refillLowWaterMark)
  2087  			opts = opts.SetRefillHighWatermark(refillHighWaterMark)
  2088  		}
  2089  	}
  2090  	opts = opts.SetDynamic(size.IsDynamic())
  2091  
  2092  	if scope != nil {
  2093  		opts = opts.SetInstrumentOptions(opts.InstrumentOptions().
  2094  			SetMetricsScope(scope))
  2095  	}
  2096  	return opts
  2097  }
  2098  
  2099  func hostSupportsHugeTLB() (bool, error) {
  2100  	// Try and determine if the host supports HugeTLB in the first place
  2101  	withHugeTLB, err := mmap.Bytes(10, mmap.Options{
  2102  		HugeTLB: mmap.HugeTLBOptions{
  2103  			Enabled:   true,
  2104  			Threshold: 0,
  2105  		},
  2106  	})
  2107  	if err != nil {
  2108  		return false, fmt.Errorf("could not mmap anonymous region: %v", err)
  2109  	}
  2110  	defer mmap.Munmap(withHugeTLB)
  2111  
  2112  	if withHugeTLB.Warning == nil {
  2113  		// If there was no warning, then the host didn't complain about
  2114  		// usa of huge TLB
  2115  		return true, nil
  2116  	}
  2117  
  2118  	// If we got a warning, try mmap'ing without HugeTLB
  2119  	withoutHugeTLB, err := mmap.Bytes(10, mmap.Options{})
  2120  	if err != nil {
  2121  		return false, fmt.Errorf("could not mmap anonymous region: %v", err)
  2122  	}
  2123  	defer mmap.Munmap(withoutHugeTLB)
  2124  	if withoutHugeTLB.Warning == nil {
  2125  		// The machine doesn't support HugeTLB, proceed without it
  2126  		return false, nil
  2127  	}
  2128  	// The warning was probably caused by something else, proceed using HugeTLB
  2129  	return true, nil
  2130  }
  2131  
  2132  func newTopoMapProvider(t topology.Topology) *topoMapProvider {
  2133  	return &topoMapProvider{t}
  2134  }
  2135  
  2136  type topoMapProvider struct {
  2137  	t topology.Topology
  2138  }
  2139  
  2140  func (t *topoMapProvider) TopologyMap() (topology.Map, error) {
  2141  	if t.t == nil {
  2142  		return nil, errors.New("topology map provider has not be set yet")
  2143  	}
  2144  
  2145  	return t.t.Get(), nil
  2146  }
  2147  
  2148  // Ensure mmap reporter implements mmap.Reporter
  2149  var _ mmap.Reporter = (*mmapReporter)(nil)
  2150  
  2151  type mmapReporter struct {
  2152  	sync.Mutex
  2153  	scope   tally.Scope
  2154  	entries map[string]*mmapReporterEntry
  2155  }
  2156  
  2157  type mmapReporterEntry struct {
  2158  	value int64
  2159  	gauge tally.Gauge
  2160  }
  2161  
  2162  func newMmapReporter(scope tally.Scope) *mmapReporter {
  2163  	return &mmapReporter{
  2164  		scope:   scope,
  2165  		entries: make(map[string]*mmapReporterEntry),
  2166  	}
  2167  }
  2168  
  2169  func (r *mmapReporter) Run(ctx context.Context) {
  2170  	ticker := time.NewTicker(30 * time.Second)
  2171  	defer ticker.Stop()
  2172  
  2173  	for {
  2174  		select {
  2175  		case <-ctx.Done():
  2176  			return
  2177  		case <-ticker.C:
  2178  			r.Lock()
  2179  			for _, r := range r.entries {
  2180  				r.gauge.Update(float64(r.value))
  2181  			}
  2182  			r.Unlock()
  2183  		}
  2184  	}
  2185  }
  2186  
  2187  func (r *mmapReporter) entryKeyAndTags(ctx mmap.Context) (string, map[string]string) {
  2188  	numTags := 1
  2189  	if ctx.Metadata != nil {
  2190  		numTags += len(ctx.Metadata)
  2191  	}
  2192  
  2193  	tags := make(map[string]string, numTags)
  2194  	tags[mmapReporterTagName] = ctx.Name
  2195  	if ctx.Metadata != nil {
  2196  		for k, v := range ctx.Metadata {
  2197  			tags[k] = v
  2198  		}
  2199  	}
  2200  
  2201  	entryKey := tally.KeyForStringMap(tags)
  2202  	return entryKey, tags
  2203  }
  2204  
  2205  func (r *mmapReporter) ReportMap(ctx mmap.Context) error {
  2206  	if ctx.Name == "" {
  2207  		return fmt.Errorf("report mmap map missing context name: %+v", ctx)
  2208  	}
  2209  
  2210  	entryKey, entryTags := r.entryKeyAndTags(ctx)
  2211  
  2212  	r.Lock()
  2213  	defer r.Unlock()
  2214  
  2215  	entry, ok := r.entries[entryKey]
  2216  	if !ok {
  2217  		entry = &mmapReporterEntry{
  2218  			gauge: r.scope.Tagged(entryTags).Gauge(mmapReporterMetricName),
  2219  		}
  2220  		r.entries[entryKey] = entry
  2221  	}
  2222  
  2223  	entry.value += ctx.Size
  2224  
  2225  	return nil
  2226  }
  2227  
  2228  func (r *mmapReporter) ReportUnmap(ctx mmap.Context) error {
  2229  	if ctx.Name == "" {
  2230  		return fmt.Errorf("report mmap unmap missing context name: %+v", ctx)
  2231  	}
  2232  
  2233  	entryKey, _ := r.entryKeyAndTags(ctx)
  2234  
  2235  	r.Lock()
  2236  	defer r.Unlock()
  2237  
  2238  	entry, ok := r.entries[entryKey]
  2239  	if !ok {
  2240  		return fmt.Errorf("report mmap unmap missing entry for context: %+v", ctx)
  2241  	}
  2242  
  2243  	entry.value -= ctx.Size
  2244  
  2245  	if entry.value == 0 {
  2246  		// No more similar mmaps active for this context name, garbage collect
  2247  		delete(r.entries, entryKey)
  2248  	}
  2249  
  2250  	return nil
  2251  }