github.com/m3db/m3@v1.5.0/src/dbnode/server/server.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 // Package server contains the code to run the dbnode server. 22 package server 23 24 import ( 25 "context" 26 "errors" 27 "fmt" 28 "io" 29 "math" 30 "net/http" 31 "os" 32 "path" 33 "runtime" 34 "runtime/debug" 35 "strings" 36 "sync" 37 "time" 38 39 clusterclient "github.com/m3db/m3/src/cluster/client" 40 "github.com/m3db/m3/src/cluster/client/etcd" 41 "github.com/m3db/m3/src/cluster/generated/proto/commonpb" 42 "github.com/m3db/m3/src/cluster/generated/proto/kvpb" 43 "github.com/m3db/m3/src/cluster/kv" 44 "github.com/m3db/m3/src/cluster/placement" 45 "github.com/m3db/m3/src/cluster/placementhandler" 46 "github.com/m3db/m3/src/cluster/placementhandler/handleroptions" 47 "github.com/m3db/m3/src/cmd/services/m3dbnode/config" 48 "github.com/m3db/m3/src/dbnode/client" 49 "github.com/m3db/m3/src/dbnode/encoding" 50 "github.com/m3db/m3/src/dbnode/encoding/m3tsz" 51 "github.com/m3db/m3/src/dbnode/encoding/proto" 52 "github.com/m3db/m3/src/dbnode/environment" 53 "github.com/m3db/m3/src/dbnode/kvconfig" 54 "github.com/m3db/m3/src/dbnode/namespace" 55 hjcluster "github.com/m3db/m3/src/dbnode/network/server/httpjson/cluster" 56 hjnode "github.com/m3db/m3/src/dbnode/network/server/httpjson/node" 57 "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift" 58 ttcluster "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/cluster" 59 ttnode "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/node" 60 "github.com/m3db/m3/src/dbnode/persist/fs" 61 "github.com/m3db/m3/src/dbnode/persist/fs/commitlog" 62 "github.com/m3db/m3/src/dbnode/ratelimit" 63 "github.com/m3db/m3/src/dbnode/retention" 64 m3dbruntime "github.com/m3db/m3/src/dbnode/runtime" 65 "github.com/m3db/m3/src/dbnode/sharding" 66 "github.com/m3db/m3/src/dbnode/storage" 67 "github.com/m3db/m3/src/dbnode/storage/block" 68 "github.com/m3db/m3/src/dbnode/storage/bootstrap/result" 69 "github.com/m3db/m3/src/dbnode/storage/cluster" 70 "github.com/m3db/m3/src/dbnode/storage/index" 71 "github.com/m3db/m3/src/dbnode/storage/limits" 72 "github.com/m3db/m3/src/dbnode/storage/limits/permits" 73 "github.com/m3db/m3/src/dbnode/storage/series" 74 "github.com/m3db/m3/src/dbnode/topology" 75 "github.com/m3db/m3/src/dbnode/ts" 76 "github.com/m3db/m3/src/dbnode/ts/writes" 77 xtchannel "github.com/m3db/m3/src/dbnode/x/tchannel" 78 "github.com/m3db/m3/src/dbnode/x/xio" 79 "github.com/m3db/m3/src/dbnode/x/xpool" 80 m3ninxindex "github.com/m3db/m3/src/m3ninx/index" 81 "github.com/m3db/m3/src/m3ninx/postings" 82 "github.com/m3db/m3/src/m3ninx/postings/roaring" 83 "github.com/m3db/m3/src/x/clock" 84 xconfig "github.com/m3db/m3/src/x/config" 85 xcontext "github.com/m3db/m3/src/x/context" 86 xdebug "github.com/m3db/m3/src/x/debug" 87 extdebug "github.com/m3db/m3/src/x/debug/ext" 88 xdocs "github.com/m3db/m3/src/x/docs" 89 "github.com/m3db/m3/src/x/ident" 90 "github.com/m3db/m3/src/x/instrument" 91 "github.com/m3db/m3/src/x/mmap" 92 xos "github.com/m3db/m3/src/x/os" 93 "github.com/m3db/m3/src/x/pool" 94 "github.com/m3db/m3/src/x/serialize" 95 96 apachethrift "github.com/apache/thrift/lib/go/thrift" 97 "github.com/m3dbx/vellum/levenshtein" 98 "github.com/m3dbx/vellum/levenshtein2" 99 "github.com/m3dbx/vellum/regexp" 100 "github.com/opentracing/opentracing-go" 101 "github.com/uber-go/tally" 102 "github.com/uber/tchannel-go" 103 "go.etcd.io/etcd/server/v3/embed" 104 "go.uber.org/zap" 105 ) 106 107 const ( 108 bootstrapConfigInitTimeout = 10 * time.Second 109 serverGracefulCloseTimeout = 10 * time.Second 110 debugServerGracefulCloseTimeout = 2 * time.Second 111 bgProcessLimitInterval = 10 * time.Second 112 maxBgProcessLimitMonitorDuration = 5 * time.Minute 113 cpuProfileDuration = 5 * time.Second 114 filePathPrefixLockFile = ".lock" 115 defaultServiceName = "m3dbnode" 116 skipRaiseProcessLimitsEnvVar = "SKIP_PROCESS_LIMITS_RAISE" 117 skipRaiseProcessLimitsEnvVarTrue = "true" 118 mmapReporterMetricName = "mmap-mapped-bytes" 119 mmapReporterTagName = "map-name" 120 ) 121 122 // RunOptions provides options for running the server 123 // with backwards compatibility if only solely adding fields. 124 type RunOptions struct { 125 // ConfigFile is the YAML configuration file to use to run the server. 126 ConfigFile string 127 128 // Config is an alternate way to provide configuration and will be used 129 // instead of parsing ConfigFile if ConfigFile is not specified. 130 Config config.DBConfiguration 131 132 // BootstrapCh is a channel to listen on to be notified of bootstrap. 133 BootstrapCh chan<- struct{} 134 135 // EmbeddedKVCh is a channel to listen on to be notified that the embedded KV has bootstrapped. 136 EmbeddedKVCh chan<- struct{} 137 138 // ClientCh is a channel to listen on to share the same m3db client that this server uses. 139 ClientCh chan<- client.Client 140 141 // ClusterClientCh is a channel to listen on to share the same m3 cluster client that this server uses. 142 ClusterClientCh chan<- clusterclient.Client 143 144 // KVStoreCh is a channel to listen on to share the same m3 kv store client that this server uses. 145 KVStoreCh chan<- kv.Store 146 147 // InterruptCh is a programmatic interrupt channel to supply to 148 // interrupt and shutdown the server. 149 InterruptCh <-chan error 150 151 // ShutdownCh is an optional channel to supply if interested in receiving 152 // a notification that the server has shutdown. 153 ShutdownCh chan<- struct{} 154 155 // CustomOptions are custom options to apply to the session. 156 CustomOptions []client.CustomAdminOption 157 158 // Transform is a function to transform the Options. 159 Transform storage.OptionTransform 160 161 // StorageOptions are additional storage options. 162 StorageOptions StorageOptions 163 164 // CustomBuildTags are additional tags to be added to the instrument build 165 // reporter. 166 CustomBuildTags map[string]string 167 } 168 169 // Run runs the server programmatically given a filename for the 170 // configuration file. 171 func Run(runOpts RunOptions) { 172 var cfg config.DBConfiguration 173 if runOpts.ConfigFile != "" { 174 var rootCfg config.Configuration 175 if err := xconfig.LoadFile(&rootCfg, runOpts.ConfigFile, xconfig.Options{}); err != nil { 176 // NB(r): Use fmt.Fprintf(os.Stderr, ...) to avoid etcd.SetGlobals() 177 // sending stdlib "log" to black hole. Don't remove unless with good reason. 178 fmt.Fprintf(os.Stderr, "unable to load %s: %v", runOpts.ConfigFile, err) 179 os.Exit(1) 180 } 181 182 cfg = *rootCfg.DB 183 } else { 184 cfg = runOpts.Config 185 } 186 187 err := cfg.Validate() 188 if err != nil { 189 // NB(r): Use fmt.Fprintf(os.Stderr, ...) to avoid etcd.SetGlobals() 190 // sending stdlib "log" to black hole. Don't remove unless with good reason. 191 fmt.Fprintf(os.Stderr, "error initializing config defaults and validating config: %v", err) 192 os.Exit(1) 193 } 194 195 logger, err := cfg.LoggingOrDefault().BuildLogger() 196 if err != nil { 197 // NB(r): Use fmt.Fprintf(os.Stderr, ...) to avoid etcd.SetGlobals() 198 // sending stdlib "log" to black hole. Don't remove unless with good reason. 199 fmt.Fprintf(os.Stderr, "unable to create logger: %v", err) 200 os.Exit(1) 201 } 202 203 // NB(nate): Register shutdown notification defer function first so that 204 // it's the last defer to fire before terminating. This allows other defer methods 205 // that clean up resources to execute first. 206 if runOpts.ShutdownCh != nil { 207 defer func() { 208 select { 209 case runOpts.ShutdownCh <- struct{}{}: 210 break 211 default: 212 logger.Warn("could not send shutdown notification as channel was full") 213 } 214 }() 215 } 216 217 interruptOpts := xos.NewInterruptOptions() 218 if runOpts.InterruptCh != nil { 219 interruptOpts.InterruptCh = runOpts.InterruptCh 220 } 221 intWatchCancel := xos.WatchForInterrupt(logger, interruptOpts) 222 defer intWatchCancel() 223 224 defer logger.Sync() 225 226 cfg.Debug.SetRuntimeValues(logger) 227 228 xconfig.WarnOnDeprecation(cfg, logger) 229 230 // By default attempt to raise process limits, which is a benign operation. 231 skipRaiseLimits := strings.TrimSpace(os.Getenv(skipRaiseProcessLimitsEnvVar)) 232 if skipRaiseLimits != skipRaiseProcessLimitsEnvVarTrue { 233 // Raise fd limits to nr_open system limit 234 result, err := xos.RaiseProcessNoFileToNROpen() 235 if err != nil { 236 logger.Warn("unable to raise rlimit", zap.Error(err)) 237 } else { 238 logger.Info("raised rlimit no file fds limit", 239 zap.Bool("required", result.RaisePerformed), 240 zap.Uint64("sysNROpenValue", result.NROpenValue), 241 zap.Uint64("noFileMaxValue", result.NoFileMaxValue), 242 zap.Uint64("noFileCurrValue", result.NoFileCurrValue)) 243 } 244 } 245 246 // Parse file and directory modes 247 newFileMode, err := cfg.Filesystem.ParseNewFileMode() 248 if err != nil { 249 logger.Fatal("could not parse new file mode", zap.Error(err)) 250 } 251 252 newDirectoryMode, err := cfg.Filesystem.ParseNewDirectoryMode() 253 if err != nil { 254 logger.Fatal("could not parse new directory mode", zap.Error(err)) 255 } 256 257 // Obtain a lock on `filePathPrefix`, or exit if another process already has it. 258 // The lock consists of a lock file (on the file system) and a lock in memory. 259 // When the process exits gracefully, both the lock file and the lock will be removed. 260 // If the process exits ungracefully, only the lock in memory will be removed, the lock 261 // file will remain on the file system. When a dbnode starts after an ungracefully stop, 262 // it will be able to acquire the lock despite the fact the the lock file exists. 263 lockPath := path.Join(cfg.Filesystem.FilePathPrefixOrDefault(), filePathPrefixLockFile) 264 fslock, err := createAndAcquireLockfile(lockPath, newDirectoryMode) 265 if err != nil { 266 logger.Fatal("could not acquire lock", zap.String("path", lockPath), zap.Error(err)) 267 } 268 // nolint: errcheck 269 defer fslock.releaseLockfile() 270 271 go bgValidateProcessLimits(logger) 272 debug.SetGCPercent(cfg.GCPercentageOrDefault()) 273 274 defaultServeMux := http.NewServeMux() 275 scope, _, _, err := cfg.MetricsOrDefault().NewRootScopeAndReporters( 276 instrument.NewRootScopeAndReportersOptions{ 277 PrometheusDefaultServeMux: defaultServeMux, 278 }) 279 if err != nil { 280 logger.Fatal("could not connect to metrics", zap.Error(err)) 281 } 282 283 hostID, err := cfg.HostIDOrDefault().Resolve() 284 if err != nil { 285 logger.Fatal("could not resolve local host ID", zap.Error(err)) 286 } 287 288 var ( 289 tracer opentracing.Tracer 290 traceCloser io.Closer 291 ) 292 293 if cfg.Tracing == nil { 294 tracer = opentracing.NoopTracer{} 295 logger.Info("tracing disabled; set `tracing.backend` to enable") 296 } else { 297 // setup tracer 298 serviceName := cfg.Tracing.ServiceName 299 if serviceName == "" { 300 serviceName = defaultServiceName 301 } 302 tracer, traceCloser, err = cfg.Tracing.NewTracer(serviceName, scope.SubScope("jaeger"), logger) 303 if err != nil { 304 tracer = opentracing.NoopTracer{} 305 logger.Warn("could not initialize tracing; using no-op tracer instead", 306 zap.String("service", serviceName), zap.Error(err)) 307 } else { 308 defer traceCloser.Close() 309 logger.Info("tracing enabled", zap.String("service", serviceName)) 310 } 311 } 312 313 // Presence of KV server config indicates embedded etcd cluster 314 discoveryConfig := cfg.DiscoveryOrDefault() 315 envConfig, err := discoveryConfig.EnvironmentConfig(hostID) 316 if err != nil { 317 logger.Fatal("could not get env config from discovery config", zap.Error(err)) 318 } 319 320 if envConfig.SeedNodes == nil { 321 logger.Info("no seed nodes set, using dedicated etcd cluster") 322 } else { 323 // Default etcd client clusters if not set already 324 service, err := envConfig.Services.SyncCluster() 325 if err != nil { 326 logger.Fatal("invalid cluster configuration", zap.Error(err)) 327 } 328 329 clusters := service.Service.ETCDClusters 330 seedNodes := envConfig.SeedNodes.InitialCluster 331 if len(clusters) == 0 { 332 endpoints, err := config.InitialClusterEndpoints(seedNodes) 333 if err != nil { 334 logger.Fatal("unable to create etcd clusters", zap.Error(err)) 335 } 336 337 zone := service.Service.Zone 338 339 logger.Info("using seed nodes etcd cluster", 340 zap.String("zone", zone), zap.Strings("endpoints", endpoints)) 341 service.Service.ETCDClusters = []etcd.ClusterConfig{{ 342 Zone: zone, 343 Endpoints: endpoints, 344 }} 345 } 346 347 seedNodeHostIDs := make([]string, 0, len(seedNodes)) 348 for _, entry := range seedNodes { 349 seedNodeHostIDs = append(seedNodeHostIDs, entry.HostID) 350 } 351 logger.Info("resolving seed node configuration", 352 zap.String("hostID", hostID), zap.Strings("seedNodeHostIDs", seedNodeHostIDs), 353 ) 354 355 if !config.IsSeedNode(seedNodes, hostID) { 356 logger.Info("not a seed node, using cluster seed nodes") 357 } else { 358 logger.Info("seed node, starting etcd server") 359 360 etcdCfg, err := config.NewEtcdEmbedConfig(cfg) 361 if err != nil { 362 logger.Fatal("unable to create etcd config", zap.Error(err)) 363 } 364 365 e, err := embed.StartEtcd(etcdCfg) 366 if err != nil { 367 logger.Fatal("could not start embedded etcd", zap.Error(err)) 368 } 369 370 if runOpts.EmbeddedKVCh != nil { 371 // Notify on embedded KV bootstrap chan if specified 372 runOpts.EmbeddedKVCh <- struct{}{} 373 } 374 375 defer e.Close() 376 } 377 } 378 379 // By default use histogram timers for timers that 380 // are constructed allowing for type to be picked 381 // by the caller using instrument.NewTimer(...). 382 timerOpts := instrument.NewHistogramTimerOptions(instrument.HistogramTimerOptions{}) 383 timerOpts.StandardSampleRate = cfg.MetricsOrDefault().SampleRate() 384 385 var ( 386 opts = storage.NewOptions() 387 iOpts = opts.InstrumentOptions(). 388 SetLogger(logger). 389 SetMetricsScope(scope). 390 SetTimerOptions(timerOpts). 391 SetTracer(tracer). 392 SetCustomBuildTags(runOpts.CustomBuildTags) 393 ) 394 opts = opts.SetInstrumentOptions(iOpts) 395 396 // Only override the default MemoryTracker (which has default limits) if a custom limit has 397 // been set. 398 if cfg.Limits.MaxOutstandingRepairedBytes > 0 { 399 memTrackerOptions := storage.NewMemoryTrackerOptions(cfg.Limits.MaxOutstandingRepairedBytes) 400 memTracker := storage.NewMemoryTracker(memTrackerOptions) 401 opts = opts.SetMemoryTracker(memTracker) 402 } 403 404 opentracing.SetGlobalTracer(tracer) 405 406 // Set global index options. 407 if n := cfg.Index.RegexpDFALimitOrDefault(); n > 0 { 408 regexp.SetStateLimit(n) 409 levenshtein.SetStateLimit(n) 410 levenshtein2.SetStateLimit(n) 411 } 412 if n := cfg.Index.RegexpFSALimitOrDefault(); n > 0 { 413 regexp.SetDefaultLimit(n) 414 } 415 416 buildReporter := instrument.NewBuildReporter(iOpts) 417 if err := buildReporter.Start(); err != nil { 418 logger.Fatal("unable to start build reporter", zap.Error(err)) 419 } 420 defer buildReporter.Stop() 421 422 mmapCfg := cfg.Filesystem.MmapConfigurationOrDefault() 423 shouldUseHugeTLB := mmapCfg.HugeTLB.Enabled 424 if shouldUseHugeTLB { 425 // Make sure the host supports HugeTLB before proceeding with it to prevent 426 // excessive log spam. 427 shouldUseHugeTLB, err = hostSupportsHugeTLB() 428 if err != nil { 429 logger.Fatal("could not determine if host supports HugeTLB", zap.Error(err)) 430 } 431 if !shouldUseHugeTLB { 432 logger.Warn("host doesn't support HugeTLB, proceeding without it") 433 } 434 } 435 436 mmapReporter := newMmapReporter(scope) 437 mmapReporterCtx, cancel := context.WithCancel(context.Background()) 438 defer cancel() 439 go mmapReporter.Run(mmapReporterCtx) 440 opts = opts.SetMmapReporter(mmapReporter) 441 442 runtimeOpts := m3dbruntime.NewOptions(). 443 SetPersistRateLimitOptions(ratelimit.NewOptions(). 444 SetLimitEnabled(true). 445 SetLimitMbps(cfg.Filesystem.ThroughputLimitMbpsOrDefault()). 446 SetLimitCheckEvery(cfg.Filesystem.ThroughputCheckEveryOrDefault())). 447 SetWriteNewSeriesAsync(cfg.WriteNewSeriesAsyncOrDefault()). 448 SetWriteNewSeriesBackoffDuration(cfg.WriteNewSeriesBackoffDurationOrDefault()) 449 450 if lruCfg := cfg.Cache.SeriesConfiguration().LRU; lruCfg != nil { 451 runtimeOpts = runtimeOpts.SetMaxWiredBlocks(lruCfg.MaxBlocks) 452 } 453 454 // Setup query stats tracking. 455 var ( 456 docsLimit = limits.DefaultLookbackLimitOptions() 457 bytesReadLimit = limits.DefaultLookbackLimitOptions() 458 diskSeriesReadLimit = limits.DefaultLookbackLimitOptions() 459 aggDocsLimit = limits.DefaultLookbackLimitOptions() 460 ) 461 462 if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedSeriesBlocks; limitConfig != nil { 463 docsLimit.Limit = limitConfig.Value 464 docsLimit.Lookback = limitConfig.Lookback 465 } 466 if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedSeriesDiskBytesRead; limitConfig != nil { 467 bytesReadLimit.Limit = limitConfig.Value 468 bytesReadLimit.Lookback = limitConfig.Lookback 469 } 470 if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedSeriesDiskRead; limitConfig != nil { 471 diskSeriesReadLimit.Limit = limitConfig.Value 472 diskSeriesReadLimit.Lookback = limitConfig.Lookback 473 } 474 if limitConfig := runOpts.Config.Limits.MaxRecentlyQueriedMetadata; limitConfig != nil { 475 aggDocsLimit.Limit = limitConfig.Value 476 aggDocsLimit.Lookback = limitConfig.Lookback 477 } 478 limitOpts := limits.NewOptions(). 479 SetDocsLimitOpts(docsLimit). 480 SetBytesReadLimitOpts(bytesReadLimit). 481 SetDiskSeriesReadLimitOpts(diskSeriesReadLimit). 482 SetAggregateDocsLimitOpts(aggDocsLimit). 483 SetInstrumentOptions(iOpts) 484 if builder := opts.SourceLoggerBuilder(); builder != nil { 485 limitOpts = limitOpts.SetSourceLoggerBuilder(builder) 486 } 487 opts = opts.SetLimitsOptions(limitOpts) 488 489 seriesReadPermits := permits.NewLookbackLimitPermitsManager( 490 "disk-series-read", 491 diskSeriesReadLimit, 492 iOpts, 493 limitOpts.SourceLoggerBuilder(), 494 ) 495 496 permitOptions := opts.PermitsOptions().SetSeriesReadPermitsManager(seriesReadPermits) 497 maxIdxConcurrency := int(math.Ceil(float64(runtime.GOMAXPROCS(0)) / 2)) 498 if cfg.Index.MaxQueryIDsConcurrency > 0 { 499 maxIdxConcurrency = cfg.Index.MaxQueryIDsConcurrency 500 logger.Info("max index query IDs concurrency set", 501 zap.Int("maxIdxConcurrency", maxIdxConcurrency)) 502 } else { 503 logger.Info("max index query IDs concurrency was not set, falling back to default value", 504 zap.Int("maxIdxConcurrency", maxIdxConcurrency)) 505 } 506 maxWorkerTime := time.Second 507 if cfg.Index.MaxWorkerTime > 0 { 508 maxWorkerTime = cfg.Index.MaxWorkerTime 509 logger.Info("max index worker time set", 510 zap.Duration("maxWorkerTime", maxWorkerTime)) 511 } else { 512 logger.Info("max index worker time was not set, falling back to default value", 513 zap.Duration("maxWorkerTime", maxWorkerTime)) 514 } 515 opts = opts.SetPermitsOptions(permitOptions.SetIndexQueryPermitsManager( 516 permits.NewFixedPermitsManager(maxIdxConcurrency, int64(maxWorkerTime), iOpts))) 517 518 // Setup postings list cache. 519 var ( 520 plCacheConfig = cfg.Cache.PostingsListConfiguration() 521 plCacheSize = plCacheConfig.SizeOrDefault() 522 plCacheOptions = index.PostingsListCacheOptions{ 523 InstrumentOptions: opts.InstrumentOptions(). 524 SetMetricsScope(scope.SubScope("postings-list-cache")), 525 } 526 ) 527 segmentPostingsListCache, err := index.NewPostingsListCache(plCacheSize, plCacheOptions) 528 if err != nil { 529 logger.Fatal("could not construct segment postings list cache", zap.Error(err)) 530 } 531 532 segmentStopReporting := segmentPostingsListCache.Start() 533 defer segmentStopReporting() 534 535 searchPostingsListCache, err := index.NewPostingsListCache(plCacheSize, plCacheOptions) 536 if err != nil { 537 logger.Fatal("could not construct searches postings list cache", zap.Error(err)) 538 } 539 540 searchStopReporting := searchPostingsListCache.Start() 541 defer searchStopReporting() 542 543 // Setup index regexp compilation cache. 544 m3ninxindex.SetRegexpCacheOptions(m3ninxindex.RegexpCacheOptions{ 545 Size: cfg.Cache.RegexpConfiguration().SizeOrDefault(), 546 Scope: iOpts.MetricsScope(), 547 }) 548 549 if runOpts.Transform != nil { 550 opts = runOpts.Transform(opts) 551 } 552 553 queryLimits, err := limits.NewQueryLimits(opts.LimitsOptions()) 554 if err != nil { 555 logger.Fatal("could not construct docs query limits from config", zap.Error(err)) 556 } 557 558 queryLimits.Start() 559 defer queryLimits.Stop() 560 seriesReadPermits.Start() 561 defer seriesReadPermits.Stop() 562 563 // FOLLOWUP(prateek): remove this once we have the runtime options<->index wiring done 564 indexOpts := opts.IndexOptions() 565 insertMode := index.InsertSync 566 567 if cfg.WriteNewSeriesAsyncOrDefault() { 568 insertMode = index.InsertAsync 569 } 570 indexOpts = indexOpts.SetInsertMode(insertMode). 571 SetPostingsListCache(segmentPostingsListCache). 572 SetSearchPostingsListCache(searchPostingsListCache). 573 SetReadThroughSegmentOptions(index.ReadThroughSegmentOptions{ 574 CacheRegexp: plCacheConfig.CacheRegexpOrDefault(), 575 CacheTerms: plCacheConfig.CacheTermsOrDefault(), 576 CacheSearches: plCacheConfig.CacheSearchOrDefault(), 577 }). 578 SetMmapReporter(mmapReporter). 579 SetQueryLimits(queryLimits) 580 581 opts = opts.SetIndexOptions(indexOpts) 582 583 if tick := cfg.Tick; tick != nil { 584 runtimeOpts = runtimeOpts. 585 SetTickSeriesBatchSize(tick.SeriesBatchSize). 586 SetTickPerSeriesSleepDuration(tick.PerSeriesSleepDuration). 587 SetTickMinimumInterval(tick.MinimumInterval) 588 } 589 590 runtimeOptsMgr := m3dbruntime.NewOptionsManager() 591 if err := runtimeOptsMgr.Update(runtimeOpts); err != nil { 592 logger.Fatal("could not set initial runtime options", zap.Error(err)) 593 } 594 defer runtimeOptsMgr.Close() 595 596 opts = opts.SetRuntimeOptionsManager(runtimeOptsMgr) 597 598 policy, err := cfg.PoolingPolicyOrDefault() 599 if err != nil { 600 logger.Fatal("could not get pooling policy", zap.Error(err)) 601 } 602 603 tagEncoderPool := serialize.NewTagEncoderPool( 604 serialize.NewTagEncoderOptions(), 605 poolOptions( 606 policy.TagEncoderPool, 607 scope.SubScope("tag-encoder-pool"))) 608 tagEncoderPool.Init() 609 tagDecoderPool := serialize.NewTagDecoderPool( 610 serialize.NewTagDecoderOptions(serialize.TagDecoderOptionsConfig{}), 611 poolOptions( 612 policy.TagDecoderPool, 613 scope.SubScope("tag-decoder-pool"))) 614 tagDecoderPool.Init() 615 616 // Pass nil for block.LeaseVerifier for now and it will be set after the 617 // db is constructed (since the db is required to construct a 618 // block.LeaseVerifier). Initialized here because it needs to be propagated 619 // to both the DB and the blockRetriever. 620 blockLeaseManager := block.NewLeaseManager(nil) 621 opts = opts.SetBlockLeaseManager(blockLeaseManager) 622 fsopts := fs.NewOptions(). 623 SetClockOptions(opts.ClockOptions()). 624 SetInstrumentOptions(opts.InstrumentOptions(). 625 SetMetricsScope(scope.SubScope("database.fs"))). 626 SetFilePathPrefix(cfg.Filesystem.FilePathPrefixOrDefault()). 627 SetNewFileMode(newFileMode). 628 SetNewDirectoryMode(newDirectoryMode). 629 SetWriterBufferSize(cfg.Filesystem.WriteBufferSizeOrDefault()). 630 SetDataReaderBufferSize(cfg.Filesystem.DataReadBufferSizeOrDefault()). 631 SetInfoReaderBufferSize(cfg.Filesystem.InfoReadBufferSizeOrDefault()). 632 SetSeekReaderBufferSize(cfg.Filesystem.SeekReadBufferSizeOrDefault()). 633 SetMmapEnableHugeTLB(shouldUseHugeTLB). 634 SetMmapHugeTLBThreshold(mmapCfg.HugeTLB.Threshold). 635 SetRuntimeOptionsManager(runtimeOptsMgr). 636 SetTagEncoderPool(tagEncoderPool). 637 SetTagDecoderPool(tagDecoderPool). 638 SetForceIndexSummariesMmapMemory(cfg.Filesystem.ForceIndexSummariesMmapMemoryOrDefault()). 639 SetForceBloomFilterMmapMemory(cfg.Filesystem.ForceBloomFilterMmapMemoryOrDefault()). 640 SetIndexBloomFilterFalsePositivePercent(cfg.Filesystem.BloomFilterFalsePositivePercentOrDefault()). 641 SetMmapReporter(mmapReporter) 642 643 var commitLogQueueSize int 644 cfgCommitLog := cfg.CommitLogOrDefault() 645 specified := cfgCommitLog.Queue.Size 646 switch cfgCommitLog.Queue.CalculationType { 647 case config.CalculationTypeFixed: 648 commitLogQueueSize = specified 649 case config.CalculationTypePerCPU: 650 commitLogQueueSize = specified * runtime.GOMAXPROCS(0) 651 default: 652 logger.Fatal("unknown commit log queue size type", 653 zap.Any("type", cfgCommitLog.Queue.CalculationType)) 654 } 655 656 var commitLogQueueChannelSize int 657 if cfgCommitLog.QueueChannel != nil { 658 specified := cfgCommitLog.QueueChannel.Size 659 switch cfgCommitLog.Queue.CalculationType { 660 case config.CalculationTypeFixed: 661 commitLogQueueChannelSize = specified 662 case config.CalculationTypePerCPU: 663 commitLogQueueChannelSize = specified * runtime.GOMAXPROCS(0) 664 default: 665 logger.Fatal("unknown commit log queue channel size type", 666 zap.Any("type", cfgCommitLog.Queue.CalculationType)) 667 } 668 } else { 669 commitLogQueueChannelSize = int(float64(commitLogQueueSize) / commitlog.MaximumQueueSizeQueueChannelSizeRatio) 670 } 671 672 // Set the series cache policy. 673 seriesCachePolicy := cfg.Cache.SeriesConfiguration().Policy 674 opts = opts.SetSeriesCachePolicy(seriesCachePolicy) 675 676 // Apply pooling options. 677 poolingPolicy, err := cfg.PoolingPolicyOrDefault() 678 if err != nil { 679 logger.Fatal("could not get pooling policy", zap.Error(err)) 680 } 681 682 opts = withEncodingAndPoolingOptions(cfg, logger, opts, poolingPolicy) 683 opts = opts.SetCommitLogOptions(opts.CommitLogOptions(). 684 SetInstrumentOptions(opts.InstrumentOptions()). 685 SetFilesystemOptions(fsopts). 686 SetStrategy(commitlog.StrategyWriteBehind). 687 SetFlushSize(cfgCommitLog.FlushMaxBytes). 688 SetFlushInterval(cfgCommitLog.FlushEvery). 689 SetBacklogQueueSize(commitLogQueueSize). 690 SetBacklogQueueChannelSize(commitLogQueueChannelSize)) 691 692 // Setup the block retriever 693 switch seriesCachePolicy { 694 case series.CacheAll: 695 // No options needed to be set 696 default: 697 // All other caching strategies require retrieving series from disk 698 // to service a cache miss 699 retrieverOpts := fs.NewBlockRetrieverOptions(). 700 SetBytesPool(opts.BytesPool()). 701 SetRetrieveRequestPool(opts.RetrieveRequestPool()). 702 SetIdentifierPool(opts.IdentifierPool()). 703 SetBlockLeaseManager(blockLeaseManager). 704 SetQueryLimits(queryLimits) 705 if blockRetrieveCfg := cfg.BlockRetrieve; blockRetrieveCfg != nil { 706 if v := blockRetrieveCfg.FetchConcurrency; v != nil { 707 retrieverOpts = retrieverOpts.SetFetchConcurrency(*v) 708 } 709 if v := blockRetrieveCfg.CacheBlocksOnRetrieve; v != nil { 710 retrieverOpts = retrieverOpts.SetCacheBlocksOnRetrieve(*v) 711 } 712 } 713 blockRetrieverMgr := block.NewDatabaseBlockRetrieverManager( 714 func(md namespace.Metadata, shardSet sharding.ShardSet) (block.DatabaseBlockRetriever, error) { 715 retriever, err := fs.NewBlockRetriever(retrieverOpts, fsopts) 716 if err != nil { 717 return nil, err 718 } 719 if err := retriever.Open(md, shardSet); err != nil { 720 return nil, err 721 } 722 return retriever, nil 723 }) 724 opts = opts.SetDatabaseBlockRetrieverManager(blockRetrieverMgr) 725 } 726 727 // Set the persistence manager 728 pm, err := fs.NewPersistManager(fsopts) 729 if err != nil { 730 logger.Fatal("could not create persist manager", zap.Error(err)) 731 } 732 opts = opts.SetPersistManager(pm) 733 734 // Set the index claims manager 735 icm, err := fs.NewIndexClaimsManager(fsopts) 736 if err != nil { 737 logger.Fatal("could not create index claims manager", zap.Error(err)) 738 } 739 defer func() { 740 // Reset counter of index claims managers after server teardown. 741 fs.ResetIndexClaimsManagersUnsafe() 742 }() 743 opts = opts.SetIndexClaimsManager(icm) 744 745 if value := cfg.ForceColdWritesEnabled; value != nil { 746 // Allow forcing cold writes to be enabled by config. 747 opts = opts.SetForceColdWritesEnabled(*value) 748 } 749 750 forceColdWrites := opts.ForceColdWritesEnabled() 751 var envCfgResults environment.ConfigureResults 752 if len(envConfig.Statics) == 0 { 753 logger.Info("creating dynamic config service client with m3cluster") 754 755 envCfgResults, err = envConfig.Configure(environment.ConfigurationParameters{ 756 InterruptedCh: interruptOpts.InterruptedCh, 757 InstrumentOpts: iOpts, 758 HashingSeed: cfg.Hashing.Seed, 759 NewDirectoryMode: newDirectoryMode, 760 ForceColdWritesEnabled: forceColdWrites, 761 }) 762 if err != nil { 763 logger.Fatal("could not initialize dynamic config", zap.Error(err)) 764 } 765 } else { 766 logger.Info("creating static config service client with m3cluster") 767 768 envCfgResults, err = envConfig.Configure(environment.ConfigurationParameters{ 769 InterruptedCh: interruptOpts.InterruptedCh, 770 InstrumentOpts: iOpts, 771 HostID: hostID, 772 ForceColdWritesEnabled: forceColdWrites, 773 }) 774 if err != nil { 775 logger.Fatal("could not initialize static config", zap.Error(err)) 776 } 777 } 778 779 syncCfg, err := envCfgResults.SyncCluster() 780 if err != nil { 781 logger.Fatal("invalid cluster config", zap.Error(err)) 782 } 783 if runOpts.ClusterClientCh != nil { 784 runOpts.ClusterClientCh <- syncCfg.ClusterClient 785 } 786 if runOpts.KVStoreCh != nil { 787 runOpts.KVStoreCh <- syncCfg.KVStore 788 } 789 790 opts = opts.SetNamespaceInitializer(syncCfg.NamespaceInitializer) 791 792 // Set tchannelthrift options. 793 ttopts := tchannelthrift.NewOptions(). 794 SetClockOptions(opts.ClockOptions()). 795 SetInstrumentOptions(opts.InstrumentOptions()). 796 SetTopologyInitializer(syncCfg.TopologyInitializer). 797 SetIdentifierPool(opts.IdentifierPool()). 798 SetTagEncoderPool(tagEncoderPool). 799 SetCheckedBytesWrapperPool(opts.CheckedBytesWrapperPool()). 800 SetMaxOutstandingWriteRequests(cfg.Limits.MaxOutstandingWriteRequests). 801 SetMaxOutstandingReadRequests(cfg.Limits.MaxOutstandingReadRequests). 802 SetQueryLimits(queryLimits). 803 SetPermitsOptions(opts.PermitsOptions()) 804 805 // Start servers before constructing the DB so orchestration tools can check health endpoints 806 // before topology is set. 807 var ( 808 contextPool = opts.ContextPool() 809 tchannelOpts = xtchannel.NewDefaultChannelOptions() 810 // Pass nil for the database argument because we haven't constructed it yet. We'll call 811 // SetDatabase() once we've initialized it. 812 service = ttnode.NewService(nil, ttopts) 813 ) 814 if cfg.TChannel != nil { 815 tchannelOpts.MaxIdleTime = cfg.TChannel.MaxIdleTime 816 tchannelOpts.IdleCheckInterval = cfg.TChannel.IdleCheckInterval 817 } 818 tchanOpts := ttnode.NewOptions(tchannelOpts). 819 SetInstrumentOptions(opts.InstrumentOptions()) 820 if fn := runOpts.StorageOptions.TChanChannelFn; fn != nil { 821 tchanOpts = tchanOpts.SetTChanChannelFn(fn) 822 } 823 if fn := runOpts.StorageOptions.TChanNodeServerFn; fn != nil { 824 tchanOpts = tchanOpts.SetTChanNodeServerFn(fn) 825 } 826 827 listenAddress := cfg.ListenAddressOrDefault() 828 tchannelthriftNodeClose, err := ttnode.NewServer(service, 829 listenAddress, contextPool, tchanOpts).ListenAndServe() 830 if err != nil { 831 logger.Fatal("could not open tchannelthrift interface", 832 zap.String("address", listenAddress), zap.Error(err)) 833 } 834 defer tchannelthriftNodeClose() 835 logger.Info("node tchannelthrift: listening", zap.String("address", listenAddress)) 836 837 httpListenAddress := cfg.HTTPNodeListenAddressOrDefault() 838 httpjsonNodeClose, err := hjnode.NewServer(service, 839 httpListenAddress, contextPool, nil).ListenAndServe() 840 if err != nil { 841 logger.Fatal("could not open httpjson interface", 842 zap.String("address", httpListenAddress), zap.Error(err)) 843 } 844 defer httpjsonNodeClose() 845 logger.Info("node httpjson: listening", zap.String("address", httpListenAddress)) 846 847 debugListenAddress := cfg.DebugListenAddressOrDefault() 848 if debugListenAddress != "" { 849 var debugWriter xdebug.ZipWriter 850 handlerOpts, err := placementhandler.NewHandlerOptions(syncCfg.ClusterClient, 851 placement.Configuration{}, nil, iOpts) 852 if err != nil { 853 logger.Warn("could not create handler options for debug writer", zap.Error(err)) 854 } else { 855 envCfgCluster, err := envConfig.Services.SyncCluster() 856 if err != nil || envCfgCluster.Service == nil { 857 logger.Warn("could not get cluster config for debug writer", 858 zap.Error(err), 859 zap.Bool("envCfgClusterServiceIsNil", envCfgCluster.Service == nil)) 860 } else { 861 debugWriter, err = extdebug.NewPlacementAndNamespaceZipWriterWithDefaultSources( 862 cpuProfileDuration, 863 syncCfg.ClusterClient, 864 handlerOpts, 865 []handleroptions.ServiceNameAndDefaults{ 866 { 867 ServiceName: handleroptions.M3DBServiceName, 868 Defaults: []handleroptions.ServiceOptionsDefault{ 869 handleroptions.WithDefaultServiceEnvironment(envCfgCluster.Service.Env), 870 handleroptions.WithDefaultServiceZone(envCfgCluster.Service.Zone), 871 }, 872 }, 873 }, 874 iOpts) 875 if err != nil { 876 logger.Error("unable to create debug writer", zap.Error(err)) 877 } 878 } 879 } 880 881 debugClose := startDebugServer(debugWriter, logger, debugListenAddress, defaultServeMux) 882 defer debugClose() 883 } 884 885 topo, err := syncCfg.TopologyInitializer.Init() 886 if err != nil { 887 var interruptErr *xos.InterruptError 888 if errors.As(err, &interruptErr) { 889 logger.Warn("interrupt received. closing server", zap.Error(err)) 890 // NB(nate): Have not attempted to start the actual database yet so 891 // it's safe for us to just return here. 892 return 893 } 894 895 logger.Fatal("could not initialize m3db topology", zap.Error(err)) 896 } 897 898 var protoEnabled bool 899 if cfg.Proto != nil && cfg.Proto.Enabled { 900 protoEnabled = true 901 } 902 schemaRegistry := namespace.NewSchemaRegistry(protoEnabled, logger) 903 // For application m3db client integration test convenience (where a local dbnode is started as a docker container), 904 // we allow loading user schema from local file into schema registry. 905 if protoEnabled { 906 for nsID, protoConfig := range cfg.Proto.SchemaRegistry { 907 dummyDeployID := "fromconfig" 908 if err := namespace.LoadSchemaRegistryFromFile(schemaRegistry, ident.StringID(nsID), 909 dummyDeployID, 910 protoConfig.SchemaFilePath, protoConfig.MessageName); err != nil { 911 logger.Fatal("could not load schema from configuration", zap.Error(err)) 912 } 913 } 914 } 915 916 origin := topology.NewHost(hostID, "") 917 m3dbClient, err := newAdminClient( 918 cfg.Client, opts.ClockOptions(), iOpts, tchannelOpts, syncCfg.TopologyInitializer, 919 runtimeOptsMgr, origin, protoEnabled, schemaRegistry, 920 syncCfg.KVStore, opts.ContextPool(), opts.BytesPool(), opts.IdentifierPool(), 921 logger, runOpts.CustomOptions) 922 if err != nil { 923 logger.Fatal("could not create m3db client", zap.Error(err)) 924 } 925 926 if runOpts.ClientCh != nil { 927 runOpts.ClientCh <- m3dbClient 928 } 929 930 documentsBuilderAlloc := index.NewBootstrapResultDocumentsBuilderAllocator( 931 opts.IndexOptions()) 932 rsOpts := result.NewOptions(). 933 SetClockOptions(opts.ClockOptions()). 934 SetInstrumentOptions(opts.InstrumentOptions()). 935 SetDatabaseBlockOptions(opts.DatabaseBlockOptions()). 936 SetSeriesCachePolicy(opts.SeriesCachePolicy()). 937 SetIndexDocumentsBuilderAllocator(documentsBuilderAlloc) 938 939 var repairClients []client.AdminClient 940 if cfg.Repair != nil && cfg.Repair.Enabled { 941 repairClients = append(repairClients, m3dbClient) 942 } 943 if cfg.Replication != nil { 944 for _, cluster := range cfg.Replication.Clusters { 945 if !cluster.RepairEnabled { 946 continue 947 } 948 949 // Pass nil for the topology initializer because we want to create 950 // a new one for the cluster we wish to replicate from, not use the 951 // same one as the cluster this node belongs to. 952 var topologyInitializer topology.Initializer 953 // Guaranteed to not be nil if repair is enabled by config validation. 954 clientCfg := *cluster.Client 955 clusterClient, err := newAdminClient( 956 clientCfg, opts.ClockOptions(), iOpts, tchannelOpts, topologyInitializer, 957 runtimeOptsMgr, origin, protoEnabled, schemaRegistry, 958 syncCfg.KVStore, opts.ContextPool(), opts.BytesPool(), 959 opts.IdentifierPool(), logger, runOpts.CustomOptions) 960 if err != nil { 961 logger.Fatal( 962 "unable to create client for replicated cluster", 963 zap.String("clusterName", cluster.Name), zap.Error(err)) 964 } 965 repairClients = append(repairClients, clusterClient) 966 } 967 } 968 repairEnabled := len(repairClients) > 0 969 if repairEnabled { 970 repairOpts := opts.RepairOptions(). 971 SetAdminClients(repairClients) 972 973 if repairCfg := cfg.Repair; repairCfg != nil { 974 repairOpts = repairOpts. 975 SetType(repairCfg.Type). 976 SetStrategy(repairCfg.Strategy). 977 SetForce(repairCfg.Force). 978 SetResultOptions(rsOpts). 979 SetDebugShadowComparisonsEnabled(cfg.Repair.DebugShadowComparisonsEnabled) 980 if cfg.Repair.Throttle > 0 { 981 repairOpts = repairOpts.SetRepairThrottle(cfg.Repair.Throttle) 982 } 983 if cfg.Repair.CheckInterval > 0 { 984 repairOpts = repairOpts.SetRepairCheckInterval(cfg.Repair.CheckInterval) 985 } 986 if cfg.Repair.Concurrency > 0 { 987 repairOpts = repairOpts.SetRepairShardConcurrency(cfg.Repair.Concurrency) 988 } 989 990 if cfg.Repair.DebugShadowComparisonsPercentage > 0 { 991 // Set conditionally to avoid stomping on the default value of 1.0. 992 repairOpts = repairOpts.SetDebugShadowComparisonsPercentage(cfg.Repair.DebugShadowComparisonsPercentage) 993 } 994 } 995 996 opts = opts. 997 SetRepairEnabled(true). 998 SetRepairOptions(repairOpts) 999 } else { 1000 opts = opts.SetRepairEnabled(false) 1001 } 1002 1003 // Set bootstrap options - We need to create a topology map provider from the 1004 // same topology that will be passed to the cluster so that when we make 1005 // bootstrapping decisions they are in sync with the clustered database 1006 // which is triggering the actual bootstraps. This way, when the clustered 1007 // database receives a topology update and decides to kick off a bootstrap, 1008 // the bootstrap process will receaive a topology map that is at least as 1009 // recent as the one that triggered the bootstrap, if not newer. 1010 // See GitHub issue #1013 for more details. 1011 topoMapProvider := newTopoMapProvider(topo) 1012 bs, err := cfg.Bootstrap.New( 1013 rsOpts, opts, topoMapProvider, origin, m3dbClient, 1014 ) 1015 if err != nil { 1016 logger.Fatal("could not create bootstrap process", zap.Error(err)) 1017 } 1018 opts = opts.SetBootstrapProcessProvider(bs) 1019 1020 // Start the cluster services now that the M3DB client is available. 1021 clusterListenAddress := cfg.ClusterListenAddressOrDefault() 1022 tchannelthriftClusterClose, err := ttcluster.NewServer(m3dbClient, 1023 clusterListenAddress, contextPool, tchannelOpts).ListenAndServe() 1024 if err != nil { 1025 logger.Fatal("could not open tchannelthrift interface", 1026 zap.String("address", clusterListenAddress), zap.Error(err)) 1027 } 1028 defer tchannelthriftClusterClose() 1029 logger.Info("cluster tchannelthrift: listening", zap.String("address", clusterListenAddress)) 1030 1031 httpClusterListenAddress := cfg.HTTPClusterListenAddressOrDefault() 1032 httpjsonClusterClose, err := hjcluster.NewServer(m3dbClient, 1033 httpClusterListenAddress, contextPool, nil).ListenAndServe() 1034 if err != nil { 1035 logger.Fatal("could not open httpjson interface", 1036 zap.String("address", httpClusterListenAddress), zap.Error(err)) 1037 } 1038 defer httpjsonClusterClose() 1039 logger.Info("cluster httpjson: listening", zap.String("address", httpClusterListenAddress)) 1040 1041 // Initialize clustered database. 1042 clusterTopoWatch, err := topo.Watch() 1043 if err != nil { 1044 logger.Fatal("could not create cluster topology watch", zap.Error(err)) 1045 } 1046 1047 opts = opts.SetSchemaRegistry(schemaRegistry). 1048 SetAdminClient(m3dbClient) 1049 1050 db, err := cluster.NewDatabase(hostID, topo, clusterTopoWatch, opts) 1051 if err != nil { 1052 logger.Fatal("could not construct database", zap.Error(err)) 1053 } 1054 1055 // Now that the database has been created it can be set as the block lease verifier 1056 // on the block lease manager. 1057 leaseVerifier := storage.NewLeaseVerifier(db) 1058 blockLeaseManager.SetLeaseVerifier(leaseVerifier) 1059 1060 if err := db.Open(); err != nil { 1061 logger.Fatal("could not open database", zap.Error(err)) 1062 } 1063 1064 // Now that we've initialized the database we can set it on the service. 1065 service.SetDatabase(db) 1066 1067 go func() { 1068 if runOpts.BootstrapCh != nil { 1069 // Notify on bootstrap chan if specified. 1070 defer func() { 1071 runOpts.BootstrapCh <- struct{}{} 1072 }() 1073 } 1074 1075 // Bootstrap asynchronously so we can handle interrupt. 1076 if err := db.Bootstrap(); err != nil { 1077 logger.Fatal("could not bootstrap database", zap.Error(err)) 1078 } 1079 logger.Info("bootstrapped") 1080 1081 // Only set the write new series limit after bootstrapping 1082 kvWatchNewSeriesLimitPerShard(syncCfg.KVStore, logger, topo, 1083 runtimeOptsMgr, cfg.Limits.WriteNewSeriesPerSecond) 1084 kvWatchEncodersPerBlockLimit(syncCfg.KVStore, logger, 1085 runtimeOptsMgr, cfg.Limits.MaxEncodersPerBlock) 1086 kvWatchQueryLimit(syncCfg.KVStore, logger, 1087 queryLimits.FetchDocsLimit(), 1088 queryLimits.BytesReadLimit(), 1089 // For backwards compatibility as M3 moves toward permits instead of time-based limits, 1090 // the series-read path uses permits which are implemented with limits, and so we support 1091 // dynamic updates to this limit-based permit still be passing downstream the limit itself. 1092 seriesReadPermits.Limit, 1093 queryLimits.AggregateDocsLimit(), 1094 limitOpts, 1095 ) 1096 }() 1097 1098 // Stop our async watch and now block waiting for the interrupt. 1099 intWatchCancel() 1100 select { 1101 case <-interruptOpts.InterruptedCh: 1102 logger.Warn("interrupt already received. closing") 1103 default: 1104 xos.WaitForInterrupt(logger, interruptOpts) 1105 } 1106 1107 // Attempt graceful server close. 1108 closedCh := make(chan struct{}) 1109 go func() { 1110 err := db.Terminate() 1111 if err != nil { 1112 logger.Error("close database error", zap.Error(err)) 1113 } 1114 closedCh <- struct{}{} 1115 }() 1116 1117 // Wait then close or hard close. 1118 closeTimeout := serverGracefulCloseTimeout 1119 select { 1120 case <-closedCh: 1121 logger.Info("server closed") 1122 case <-time.After(closeTimeout): 1123 logger.Error("server closed after timeout", zap.Duration("timeout", closeTimeout)) 1124 } 1125 } 1126 1127 func startDebugServer( 1128 debugWriter xdebug.ZipWriter, 1129 logger *zap.Logger, 1130 debugListenAddress string, 1131 mux *http.ServeMux, 1132 ) func() { 1133 xdebug.RegisterPProfHandlers(mux) 1134 server := http.Server{Addr: debugListenAddress, Handler: mux} 1135 1136 if debugWriter != nil { 1137 if err := debugWriter.RegisterHandler(xdebug.DebugURL, mux); err != nil { 1138 logger.Error("unable to register debug writer endpoint", zap.Error(err)) 1139 } 1140 } 1141 1142 go func() { 1143 if err := server.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) { 1144 logger.Error("debug server could not listen", 1145 zap.String("address", debugListenAddress), zap.Error(err)) 1146 } 1147 }() 1148 1149 return func() { 1150 ctx, cancel := context.WithTimeout(context.Background(), debugServerGracefulCloseTimeout) 1151 defer cancel() 1152 if err := server.Shutdown(ctx); err != nil { 1153 logger.Warn("debug server failed to shutdown gracefully") 1154 } else { 1155 logger.Info("debug server closed") 1156 } 1157 } 1158 } 1159 1160 func bgValidateProcessLimits(logger *zap.Logger) { 1161 // If unable to validate process limits on the current configuration, 1162 // do not run background validator task. 1163 if canValidate, message := canValidateProcessLimits(); !canValidate { 1164 logger.Warn("cannot validate process limits: invalid configuration found", 1165 zap.String("message", message)) 1166 return 1167 } 1168 1169 start := time.Now() 1170 t := time.NewTicker(bgProcessLimitInterval) 1171 defer t.Stop() 1172 for { 1173 // only monitor for first `maxBgProcessLimitMonitorDuration` of process lifetime 1174 if time.Since(start) > maxBgProcessLimitMonitorDuration { 1175 return 1176 } 1177 1178 err := validateProcessLimits() 1179 if err == nil { 1180 return 1181 } 1182 1183 logger.Warn("invalid configuration found, refer to linked documentation for more information", 1184 zap.String("url", xdocs.Path("operational_guide/kernel_configuration")), 1185 zap.Error(err), 1186 ) 1187 1188 <-t.C 1189 } 1190 } 1191 1192 func kvWatchNewSeriesLimitPerShard( 1193 store kv.Store, 1194 logger *zap.Logger, 1195 topo topology.Topology, 1196 runtimeOptsMgr m3dbruntime.OptionsManager, 1197 defaultClusterNewSeriesLimit int, 1198 ) { 1199 var initClusterLimit int 1200 1201 value, err := store.Get(kvconfig.ClusterNewSeriesInsertLimitKey) 1202 if err == nil { 1203 protoValue := &commonpb.Int64Proto{} 1204 err = value.Unmarshal(protoValue) 1205 if err == nil { 1206 initClusterLimit = int(protoValue.Value) 1207 } 1208 } 1209 1210 if err != nil { 1211 if err != kv.ErrNotFound { 1212 logger.Warn("error resolving cluster new series insert limit", zap.Error(err)) 1213 } 1214 initClusterLimit = defaultClusterNewSeriesLimit 1215 } 1216 1217 err = setNewSeriesLimitPerShardOnChange(topo, runtimeOptsMgr, initClusterLimit) 1218 if err != nil { 1219 logger.Warn("unable to set cluster new series insert limit", zap.Error(err)) 1220 } 1221 1222 watch, err := store.Watch(kvconfig.ClusterNewSeriesInsertLimitKey) 1223 if err != nil { 1224 logger.Error("could not watch cluster new series insert limit", zap.Error(err)) 1225 return 1226 } 1227 1228 go func() { 1229 protoValue := &commonpb.Int64Proto{} 1230 for range watch.C() { 1231 value := defaultClusterNewSeriesLimit 1232 if newValue := watch.Get(); newValue != nil { 1233 if err := newValue.Unmarshal(protoValue); err != nil { 1234 logger.Warn("unable to parse new cluster new series insert limit", zap.Error(err)) 1235 continue 1236 } 1237 value = int(protoValue.Value) 1238 } 1239 1240 err = setNewSeriesLimitPerShardOnChange(topo, runtimeOptsMgr, value) 1241 if err != nil { 1242 logger.Warn("unable to set cluster new series insert limit", zap.Error(err)) 1243 continue 1244 } 1245 } 1246 }() 1247 } 1248 1249 func kvWatchEncodersPerBlockLimit( 1250 store kv.Store, 1251 logger *zap.Logger, 1252 runtimeOptsMgr m3dbruntime.OptionsManager, 1253 defaultEncodersPerBlockLimit int, 1254 ) { 1255 var initEncoderLimit int 1256 1257 value, err := store.Get(kvconfig.EncodersPerBlockLimitKey) 1258 if err == nil { 1259 protoValue := &commonpb.Int64Proto{} 1260 err = value.Unmarshal(protoValue) 1261 if err == nil { 1262 initEncoderLimit = int(protoValue.Value) 1263 } 1264 } 1265 1266 if err != nil { 1267 if err != kv.ErrNotFound { 1268 logger.Warn("error resolving encoder per block limit", zap.Error(err)) 1269 } 1270 initEncoderLimit = defaultEncodersPerBlockLimit 1271 } 1272 1273 err = setEncodersPerBlockLimitOnChange(runtimeOptsMgr, initEncoderLimit) 1274 if err != nil { 1275 logger.Warn("unable to set encoder per block limit", zap.Error(err)) 1276 } 1277 1278 watch, err := store.Watch(kvconfig.EncodersPerBlockLimitKey) 1279 if err != nil { 1280 logger.Error("could not watch encoder per block limit", zap.Error(err)) 1281 return 1282 } 1283 1284 go func() { 1285 protoValue := &commonpb.Int64Proto{} 1286 for range watch.C() { 1287 value := defaultEncodersPerBlockLimit 1288 if newValue := watch.Get(); newValue != nil { 1289 if err := newValue.Unmarshal(protoValue); err != nil { 1290 logger.Warn("unable to parse new encoder per block limit", zap.Error(err)) 1291 continue 1292 } 1293 value = int(protoValue.Value) 1294 } 1295 1296 err = setEncodersPerBlockLimitOnChange(runtimeOptsMgr, value) 1297 if err != nil { 1298 logger.Warn("unable to set encoder per block limit", zap.Error(err)) 1299 continue 1300 } 1301 } 1302 }() 1303 } 1304 1305 func kvWatchQueryLimit( 1306 store kv.Store, 1307 logger *zap.Logger, 1308 docsLimit limits.LookbackLimit, 1309 bytesReadLimit limits.LookbackLimit, 1310 diskSeriesReadLimit limits.LookbackLimit, 1311 aggregateDocsLimit limits.LookbackLimit, 1312 defaultOpts limits.Options, 1313 ) { 1314 value, err := store.Get(kvconfig.QueryLimits) 1315 if err == nil { 1316 dynamicLimits := &kvpb.QueryLimits{} 1317 err = value.Unmarshal(dynamicLimits) 1318 if err == nil { 1319 updateQueryLimits( 1320 logger, docsLimit, bytesReadLimit, diskSeriesReadLimit, 1321 aggregateDocsLimit, dynamicLimits, defaultOpts) 1322 } 1323 } else if !errors.Is(err, kv.ErrNotFound) { 1324 logger.Warn("error resolving query limit", zap.Error(err)) 1325 } 1326 1327 watch, err := store.Watch(kvconfig.QueryLimits) 1328 if err != nil { 1329 logger.Error("could not watch query limit", zap.Error(err)) 1330 return 1331 } 1332 1333 go func() { 1334 dynamicLimits := &kvpb.QueryLimits{} 1335 for range watch.C() { 1336 if newValue := watch.Get(); newValue != nil { 1337 if err := newValue.Unmarshal(dynamicLimits); err != nil { 1338 logger.Warn("unable to parse new query limits", zap.Error(err)) 1339 continue 1340 } 1341 updateQueryLimits( 1342 logger, docsLimit, bytesReadLimit, diskSeriesReadLimit, 1343 aggregateDocsLimit, dynamicLimits, defaultOpts) 1344 } 1345 } 1346 }() 1347 } 1348 1349 func updateQueryLimits( 1350 logger *zap.Logger, 1351 docsLimit limits.LookbackLimit, 1352 bytesReadLimit limits.LookbackLimit, 1353 diskSeriesReadLimit limits.LookbackLimit, 1354 aggregateDocsLimit limits.LookbackLimit, 1355 dynamicOpts *kvpb.QueryLimits, 1356 configOpts limits.Options, 1357 ) { 1358 var ( 1359 // Default to the config-based limits if unset in dynamic limits. 1360 // Otherwise, use the dynamic limit. 1361 docsLimitOpts = configOpts.DocsLimitOpts() 1362 bytesReadLimitOpts = configOpts.BytesReadLimitOpts() 1363 diskSeriesReadLimitOpts = configOpts.DiskSeriesReadLimitOpts() 1364 aggDocsLimitOpts = configOpts.AggregateDocsLimitOpts() 1365 ) 1366 if dynamicOpts != nil { 1367 if dynamicOpts.MaxRecentlyQueriedSeriesBlocks != nil { 1368 docsLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesBlocks) 1369 } 1370 if dynamicOpts.MaxRecentlyQueriedSeriesDiskBytesRead != nil { 1371 bytesReadLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesDiskBytesRead) 1372 } 1373 if dynamicOpts.MaxRecentlyQueriedSeriesDiskRead != nil { 1374 diskSeriesReadLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedSeriesDiskRead) 1375 } 1376 if dynamicOpts.MaxRecentlyQueriedMetadataRead != nil { 1377 aggDocsLimitOpts = dynamicLimitToLimitOpts(dynamicOpts.MaxRecentlyQueriedMetadataRead) 1378 } 1379 } 1380 1381 if err := updateQueryLimit(docsLimit, docsLimitOpts); err != nil { 1382 logger.Error("error updating docs limit", zap.Error(err)) 1383 } 1384 1385 if err := updateQueryLimit(bytesReadLimit, bytesReadLimitOpts); err != nil { 1386 logger.Error("error updating bytes read limit", zap.Error(err)) 1387 } 1388 1389 if err := updateQueryLimit(diskSeriesReadLimit, diskSeriesReadLimitOpts); err != nil { 1390 logger.Error("error updating series read limit", zap.Error(err)) 1391 } 1392 1393 if err := updateQueryLimit(aggregateDocsLimit, aggDocsLimitOpts); err != nil { 1394 logger.Error("error updating metadata read limit", zap.Error(err)) 1395 } 1396 } 1397 1398 func updateQueryLimit( 1399 limit limits.LookbackLimit, 1400 newOpts limits.LookbackLimitOptions, 1401 ) error { 1402 old := limit.Options() 1403 if old.Equals(newOpts) { 1404 return nil 1405 } 1406 1407 return limit.Update(newOpts) 1408 } 1409 1410 func dynamicLimitToLimitOpts(dynamicLimit *kvpb.QueryLimit) limits.LookbackLimitOptions { 1411 return limits.LookbackLimitOptions{ 1412 Limit: dynamicLimit.Limit, 1413 Lookback: time.Duration(dynamicLimit.LookbackSeconds) * time.Second, 1414 ForceExceeded: dynamicLimit.ForceExceeded, 1415 ForceWaited: dynamicLimit.ForceWaited, 1416 } 1417 } 1418 1419 func kvWatchClientConsistencyLevels( 1420 store kv.Store, 1421 logger *zap.Logger, 1422 clientOpts client.AdminOptions, 1423 runtimeOptsMgr m3dbruntime.OptionsManager, 1424 ) { 1425 setReadConsistencyLevel := func( 1426 v string, 1427 applyFn func(topology.ReadConsistencyLevel, m3dbruntime.Options) m3dbruntime.Options, 1428 ) error { 1429 for _, level := range topology.ValidReadConsistencyLevels() { 1430 if level.String() == v { 1431 runtimeOpts := applyFn(level, runtimeOptsMgr.Get()) 1432 return runtimeOptsMgr.Update(runtimeOpts) 1433 } 1434 } 1435 return fmt.Errorf("invalid read consistency level set: %s", v) 1436 } 1437 1438 setConsistencyLevel := func( 1439 v string, 1440 applyFn func(topology.ConsistencyLevel, m3dbruntime.Options) m3dbruntime.Options, 1441 ) error { 1442 for _, level := range topology.ValidConsistencyLevels() { 1443 if level.String() == v { 1444 runtimeOpts := applyFn(level, runtimeOptsMgr.Get()) 1445 return runtimeOptsMgr.Update(runtimeOpts) 1446 } 1447 } 1448 return fmt.Errorf("invalid consistency level set: %s", v) 1449 } 1450 1451 kvWatchStringValue(store, logger, 1452 kvconfig.ClientBootstrapConsistencyLevel, 1453 func(value string) error { 1454 return setReadConsistencyLevel(value, 1455 func(level topology.ReadConsistencyLevel, opts m3dbruntime.Options) m3dbruntime.Options { 1456 return opts.SetClientBootstrapConsistencyLevel(level) 1457 }) 1458 }, 1459 func() error { 1460 return runtimeOptsMgr.Update(runtimeOptsMgr.Get(). 1461 SetClientBootstrapConsistencyLevel(clientOpts.BootstrapConsistencyLevel())) 1462 }) 1463 1464 kvWatchStringValue(store, logger, 1465 kvconfig.ClientReadConsistencyLevel, 1466 func(value string) error { 1467 return setReadConsistencyLevel(value, 1468 func(level topology.ReadConsistencyLevel, opts m3dbruntime.Options) m3dbruntime.Options { 1469 return opts.SetClientReadConsistencyLevel(level) 1470 }) 1471 }, 1472 func() error { 1473 return runtimeOptsMgr.Update(runtimeOptsMgr.Get(). 1474 SetClientReadConsistencyLevel(clientOpts.ReadConsistencyLevel())) 1475 }) 1476 1477 kvWatchStringValue(store, logger, 1478 kvconfig.ClientWriteConsistencyLevel, 1479 func(value string) error { 1480 return setConsistencyLevel(value, 1481 func(level topology.ConsistencyLevel, opts m3dbruntime.Options) m3dbruntime.Options { 1482 return opts.SetClientWriteConsistencyLevel(level) 1483 }) 1484 }, 1485 func() error { 1486 return runtimeOptsMgr.Update(runtimeOptsMgr.Get(). 1487 SetClientWriteConsistencyLevel(clientOpts.WriteConsistencyLevel())) 1488 }) 1489 } 1490 1491 func kvWatchStringValue( 1492 store kv.Store, 1493 logger *zap.Logger, 1494 key string, 1495 onValue func(value string) error, 1496 onDelete func() error, 1497 ) { 1498 protoValue := &commonpb.StringProto{} 1499 1500 // First try to eagerly set the value so it doesn't flap if the 1501 // watch returns but not immediately for an existing value 1502 value, err := store.Get(key) 1503 if err != nil && err != kv.ErrNotFound { 1504 logger.Error("could not resolve KV", zap.String("key", key), zap.Error(err)) 1505 } 1506 if err == nil { 1507 if err := value.Unmarshal(protoValue); err != nil { 1508 logger.Error("could not unmarshal KV key", zap.String("key", key), zap.Error(err)) 1509 } else if err := onValue(protoValue.Value); err != nil { 1510 logger.Error("could not process value of KV", zap.String("key", key), zap.Error(err)) 1511 } else { 1512 logger.Info("set KV key", zap.String("key", key), zap.Any("value", protoValue.Value)) 1513 } 1514 } 1515 1516 watch, err := store.Watch(key) 1517 if err != nil { 1518 logger.Error("could not watch KV key", zap.String("key", key), zap.Error(err)) 1519 return 1520 } 1521 1522 go func() { 1523 for range watch.C() { 1524 newValue := watch.Get() 1525 if newValue == nil { 1526 if err := onDelete(); err != nil { 1527 logger.Warn("could not set default for KV key", zap.String("key", key), zap.Error(err)) 1528 } 1529 continue 1530 } 1531 1532 err := newValue.Unmarshal(protoValue) 1533 if err != nil { 1534 logger.Warn("could not unmarshal KV key", zap.String("key", key), zap.Error(err)) 1535 continue 1536 } 1537 if err := onValue(protoValue.Value); err != nil { 1538 logger.Warn("could not process change for KV key", zap.String("key", key), zap.Error(err)) 1539 continue 1540 } 1541 logger.Info("set KV key", zap.String("key", key), zap.Any("value", protoValue.Value)) 1542 } 1543 }() 1544 } 1545 1546 func setNewSeriesLimitPerShardOnChange( 1547 topo topology.Topology, 1548 runtimeOptsMgr m3dbruntime.OptionsManager, 1549 clusterLimit int, 1550 ) error { 1551 perPlacedShardLimit := clusterLimitToPlacedShardLimit(topo, clusterLimit) 1552 runtimeOpts := runtimeOptsMgr.Get() 1553 if runtimeOpts.WriteNewSeriesLimitPerShardPerSecond() == perPlacedShardLimit { 1554 // Not changed, no need to set the value and trigger a runtime options update 1555 return nil 1556 } 1557 1558 newRuntimeOpts := runtimeOpts. 1559 SetWriteNewSeriesLimitPerShardPerSecond(perPlacedShardLimit) 1560 return runtimeOptsMgr.Update(newRuntimeOpts) 1561 } 1562 1563 func clusterLimitToPlacedShardLimit(topo topology.Topology, clusterLimit int) int { 1564 if clusterLimit < 1 { 1565 return 0 1566 } 1567 topoMap := topo.Get() 1568 numShards := len(topoMap.ShardSet().AllIDs()) 1569 numPlacedShards := numShards * topoMap.Replicas() 1570 if numPlacedShards < 1 { 1571 return 0 1572 } 1573 nodeLimit := int(math.Ceil( 1574 float64(clusterLimit) / float64(numPlacedShards))) 1575 return nodeLimit 1576 } 1577 1578 func setEncodersPerBlockLimitOnChange( 1579 runtimeOptsMgr m3dbruntime.OptionsManager, 1580 encoderLimit int, 1581 ) error { 1582 runtimeOpts := runtimeOptsMgr.Get() 1583 if runtimeOpts.EncodersPerBlockLimit() == encoderLimit { 1584 // Not changed, no need to set the value and trigger a runtime options update 1585 return nil 1586 } 1587 1588 newRuntimeOpts := runtimeOpts. 1589 SetEncodersPerBlockLimit(encoderLimit) 1590 return runtimeOptsMgr.Update(newRuntimeOpts) 1591 } 1592 1593 func withEncodingAndPoolingOptions( 1594 cfg config.DBConfiguration, 1595 logger *zap.Logger, 1596 opts storage.Options, 1597 policy config.PoolingPolicy, 1598 ) storage.Options { 1599 iOpts := opts.InstrumentOptions() 1600 scope := opts.InstrumentOptions().MetricsScope() 1601 1602 // Set the byte slice capacities for the thrift pooling. 1603 thriftBytesAllocSizes := policy.ThriftBytesPoolAllocSizesOrDefault() 1604 logger.Info("set thrift bytes pool slice sizes", 1605 zap.Ints("sizes", thriftBytesAllocSizes)) 1606 apachethrift.SetMaxBytesPoolAlloc(thriftBytesAllocSizes...) 1607 1608 bytesPoolOpts := pool.NewObjectPoolOptions(). 1609 SetInstrumentOptions(iOpts.SetMetricsScope(scope.SubScope("bytes-pool"))) 1610 checkedBytesPoolOpts := bytesPoolOpts. 1611 SetInstrumentOptions(iOpts.SetMetricsScope(scope.SubScope("checked-bytes-pool"))) 1612 1613 buckets := make([]pool.Bucket, len(policy.BytesPool.Buckets)) 1614 for i, bucket := range policy.BytesPool.Buckets { 1615 var b pool.Bucket 1616 b.Capacity = bucket.CapacityOrDefault() 1617 b.Count = bucket.SizeOrDefault() 1618 b.Options = bytesPoolOpts. 1619 SetRefillLowWatermark(bucket.RefillLowWaterMarkOrDefault()). 1620 SetRefillHighWatermark(bucket.RefillHighWaterMarkOrDefault()) 1621 buckets[i] = b 1622 1623 logger.Info("bytes pool configured", 1624 zap.Int("capacity", bucket.CapacityOrDefault()), 1625 zap.Int("size", int(bucket.SizeOrDefault())), 1626 zap.Float64("refillLowWaterMark", bucket.RefillLowWaterMarkOrDefault()), 1627 zap.Float64("refillHighWaterMark", bucket.RefillHighWaterMarkOrDefault())) 1628 } 1629 1630 var bytesPool pool.CheckedBytesPool 1631 switch policy.TypeOrDefault() { 1632 case config.SimplePooling: 1633 bytesPool = pool.NewCheckedBytesPool( 1634 buckets, 1635 checkedBytesPoolOpts, 1636 func(s []pool.Bucket) pool.BytesPool { 1637 return pool.NewBytesPool(s, bytesPoolOpts) 1638 }) 1639 default: 1640 logger.Fatal("unrecognized pooling type", zap.Any("type", policy.Type)) 1641 } 1642 1643 { 1644 // Avoid polluting the rest of the function with `l` var 1645 l := logger 1646 if t := policy.Type; t != nil { 1647 l = l.With(zap.String("policy", string(*t))) 1648 } 1649 1650 l.Info("bytes pool init start") 1651 bytesPool.Init() 1652 l.Info("bytes pool init end") 1653 } 1654 1655 segmentReaderPool := xio.NewSegmentReaderPool( 1656 poolOptions( 1657 policy.SegmentReaderPool, 1658 scope.SubScope("segment-reader-pool"))) 1659 segmentReaderPool.Init() 1660 1661 encoderPool := encoding.NewEncoderPool( 1662 poolOptions( 1663 policy.EncoderPool, 1664 scope.SubScope("encoder-pool"))) 1665 1666 closersPoolOpts := poolOptions( 1667 policy.ClosersPool, 1668 scope.SubScope("closers-pool")) 1669 1670 contextPoolOpts := poolOptions( 1671 policy.ContextPool, 1672 scope.SubScope("context-pool")) 1673 1674 contextPool := xcontext.NewPool(xcontext.NewOptions(). 1675 SetContextPoolOptions(contextPoolOpts). 1676 SetFinalizerPoolOptions(closersPoolOpts)) 1677 1678 iteratorPool := encoding.NewReaderIteratorPool( 1679 poolOptions( 1680 policy.IteratorPool, 1681 scope.SubScope("iterator-pool"))) 1682 1683 multiIteratorPool := encoding.NewMultiReaderIteratorPool( 1684 poolOptions( 1685 policy.IteratorPool, 1686 scope.SubScope("multi-iterator-pool"))) 1687 1688 writeBatchPoolInitialBatchSize := 0 1689 if policy.WriteBatchPool.InitialBatchSize != nil { 1690 // Use config value if available. 1691 writeBatchPoolInitialBatchSize = *policy.WriteBatchPool.InitialBatchSize 1692 } 1693 1694 var writeBatchPoolMaxBatchSize *int 1695 if policy.WriteBatchPool.MaxBatchSize != nil { 1696 writeBatchPoolMaxBatchSize = policy.WriteBatchPool.MaxBatchSize 1697 } 1698 1699 var writeBatchPoolSize int 1700 if policy.WriteBatchPool.Size != nil { 1701 writeBatchPoolSize = *policy.WriteBatchPool.Size 1702 } else { 1703 // If no value set, calculate a reasonable value based on the commit log 1704 // queue size. We base it off the commitlog queue size because we will 1705 // want to be able to buffer at least one full commitlog queues worth of 1706 // writes without allocating because these objects are very expensive to 1707 // allocate. 1708 commitlogQueueSize := opts.CommitLogOptions().BacklogQueueSize() 1709 expectedBatchSize := writeBatchPoolInitialBatchSize 1710 if expectedBatchSize == 0 { 1711 expectedBatchSize = client.DefaultWriteBatchSize 1712 } 1713 writeBatchPoolSize = commitlogQueueSize / expectedBatchSize 1714 } 1715 1716 writeBatchPoolOpts := pool.NewObjectPoolOptions() 1717 writeBatchPoolOpts = writeBatchPoolOpts. 1718 SetSize(writeBatchPoolSize). 1719 // Set watermarks to zero because this pool is sized to be as large as we 1720 // ever need it to be, so background allocations are usually wasteful. 1721 SetRefillLowWatermark(0.0). 1722 SetRefillHighWatermark(0.0). 1723 SetInstrumentOptions( 1724 writeBatchPoolOpts. 1725 InstrumentOptions(). 1726 SetMetricsScope(scope.SubScope("write-batch-pool"))) 1727 1728 writeBatchPool := writes.NewWriteBatchPool( 1729 writeBatchPoolOpts, 1730 writeBatchPoolInitialBatchSize, 1731 writeBatchPoolMaxBatchSize) 1732 1733 tagPoolPolicy := policy.TagsPool 1734 identifierPool := ident.NewPool(bytesPool, ident.PoolOptions{ 1735 IDPoolOptions: poolOptions( 1736 policy.IdentifierPool, scope.SubScope("identifier-pool")), 1737 TagsPoolOptions: maxCapacityPoolOptions(tagPoolPolicy, scope.SubScope("tags-pool")), 1738 TagsCapacity: tagPoolPolicy.CapacityOrDefault(), 1739 TagsMaxCapacity: tagPoolPolicy.MaxCapacityOrDefault(), 1740 TagsIteratorPoolOptions: poolOptions( 1741 policy.TagsIteratorPool, 1742 scope.SubScope("tags-iterator-pool")), 1743 }) 1744 1745 fetchBlockMetadataResultsPoolPolicy := policy.FetchBlockMetadataResultsPool 1746 fetchBlockMetadataResultsPool := block.NewFetchBlockMetadataResultsPool( 1747 capacityPoolOptions( 1748 fetchBlockMetadataResultsPoolPolicy, 1749 scope.SubScope("fetch-block-metadata-results-pool")), 1750 fetchBlockMetadataResultsPoolPolicy.CapacityOrDefault()) 1751 1752 fetchBlocksMetadataResultsPoolPolicy := policy.FetchBlocksMetadataResultsPool 1753 fetchBlocksMetadataResultsPool := block.NewFetchBlocksMetadataResultsPool( 1754 capacityPoolOptions( 1755 fetchBlocksMetadataResultsPoolPolicy, 1756 scope.SubScope("fetch-blocks-metadata-results-pool")), 1757 fetchBlocksMetadataResultsPoolPolicy.CapacityOrDefault()) 1758 1759 bytesWrapperPoolOpts := poolOptions( 1760 policy.CheckedBytesWrapperPool, 1761 scope.SubScope("checked-bytes-wrapper-pool")) 1762 bytesWrapperPool := xpool.NewCheckedBytesWrapperPool( 1763 bytesWrapperPoolOpts) 1764 bytesWrapperPool.Init() 1765 1766 encodingOpts := encoding.NewOptions(). 1767 SetEncoderPool(encoderPool). 1768 SetReaderIteratorPool(iteratorPool). 1769 SetBytesPool(bytesPool). 1770 SetSegmentReaderPool(segmentReaderPool). 1771 SetCheckedBytesWrapperPool(bytesWrapperPool). 1772 SetMetrics(encoding.NewMetrics(scope)) 1773 1774 encoderPool.Init(func() encoding.Encoder { 1775 if cfg.Proto != nil && cfg.Proto.Enabled { 1776 enc := proto.NewEncoder(0, encodingOpts) 1777 return enc 1778 } 1779 1780 return m3tsz.NewEncoder(0, nil, m3tsz.DefaultIntOptimizationEnabled, encodingOpts) 1781 }) 1782 1783 iteratorPool.Init(func(r xio.Reader64, descr namespace.SchemaDescr) encoding.ReaderIterator { 1784 if cfg.Proto != nil && cfg.Proto.Enabled { 1785 return proto.NewIterator(r, descr, encodingOpts) 1786 } 1787 return m3tsz.NewReaderIterator(r, m3tsz.DefaultIntOptimizationEnabled, encodingOpts) 1788 }) 1789 1790 multiIteratorPool.Init(func(r xio.Reader64, descr namespace.SchemaDescr) encoding.ReaderIterator { 1791 iter := iteratorPool.Get() 1792 iter.Reset(r, descr) 1793 return iter 1794 }) 1795 1796 writeBatchPool.Init() 1797 1798 bucketPool := series.NewBufferBucketPool( 1799 poolOptions(policy.BufferBucketPool, scope.SubScope("buffer-bucket-pool"))) 1800 bucketVersionsPool := series.NewBufferBucketVersionsPool( 1801 poolOptions(policy.BufferBucketVersionsPool, scope.SubScope("buffer-bucket-versions-pool"))) 1802 1803 retrieveRequestPool := fs.NewRetrieveRequestPool(segmentReaderPool, 1804 poolOptions(policy.RetrieveRequestPool, scope.SubScope("retrieve-request-pool"))) 1805 retrieveRequestPool.Init() 1806 1807 opts = opts. 1808 SetBytesPool(bytesPool). 1809 SetContextPool(contextPool). 1810 SetEncoderPool(encoderPool). 1811 SetReaderIteratorPool(iteratorPool). 1812 SetMultiReaderIteratorPool(multiIteratorPool). 1813 SetIdentifierPool(identifierPool). 1814 SetFetchBlockMetadataResultsPool(fetchBlockMetadataResultsPool). 1815 SetFetchBlocksMetadataResultsPool(fetchBlocksMetadataResultsPool). 1816 SetWriteBatchPool(writeBatchPool). 1817 SetBufferBucketPool(bucketPool). 1818 SetBufferBucketVersionsPool(bucketVersionsPool). 1819 SetRetrieveRequestPool(retrieveRequestPool). 1820 SetCheckedBytesWrapperPool(bytesWrapperPool) 1821 1822 blockOpts := opts.DatabaseBlockOptions(). 1823 SetDatabaseBlockAllocSize(policy.BlockAllocSizeOrDefault()). 1824 SetContextPool(contextPool). 1825 SetEncoderPool(encoderPool). 1826 SetReaderIteratorPool(iteratorPool). 1827 SetMultiReaderIteratorPool(multiIteratorPool). 1828 SetSegmentReaderPool(segmentReaderPool). 1829 SetBytesPool(bytesPool) 1830 1831 if opts.SeriesCachePolicy() == series.CacheLRU { 1832 var ( 1833 runtimeOpts = opts.RuntimeOptionsManager() 1834 wiredListOpts = block.WiredListOptions{ 1835 RuntimeOptionsManager: runtimeOpts, 1836 InstrumentOptions: iOpts, 1837 ClockOptions: opts.ClockOptions(), 1838 } 1839 lruCfg = cfg.Cache.SeriesConfiguration().LRU 1840 ) 1841 1842 if lruCfg != nil && lruCfg.EventsChannelSize > 0 { 1843 wiredListOpts.EventsChannelSize = int(lruCfg.EventsChannelSize) 1844 } 1845 wiredList := block.NewWiredList(wiredListOpts) 1846 blockOpts = blockOpts.SetWiredList(wiredList) 1847 } 1848 blockPool := block.NewDatabaseBlockPool( 1849 poolOptions( 1850 policy.BlockPool, 1851 scope.SubScope("block-pool"))) 1852 blockPool.Init(func() block.DatabaseBlock { 1853 return block.NewDatabaseBlock(0, 0, ts.Segment{}, blockOpts, namespace.Context{}) 1854 }) 1855 blockOpts = blockOpts.SetDatabaseBlockPool(blockPool) 1856 opts = opts.SetDatabaseBlockOptions(blockOpts) 1857 1858 // NB(prateek): retention opts are overridden per namespace during series creation 1859 retentionOpts := retention.NewOptions() 1860 seriesOpts := storage.NewSeriesOptionsFromOptions(opts, retentionOpts). 1861 SetFetchBlockMetadataResultsPool(opts.FetchBlockMetadataResultsPool()) 1862 seriesPool := series.NewDatabaseSeriesPool( 1863 poolOptions( 1864 policy.SeriesPool, 1865 scope.SubScope("series-pool"))) 1866 1867 opts = opts. 1868 SetSeriesOptions(seriesOpts). 1869 SetDatabaseSeriesPool(seriesPool) 1870 opts = opts.SetCommitLogOptions(opts.CommitLogOptions(). 1871 SetBytesPool(bytesPool). 1872 SetIdentifierPool(identifierPool)) 1873 1874 postingsListOpts := poolOptions(policy.PostingsListPool, scope.SubScope("postingslist-pool")) 1875 postingsList := postings.NewPool(postingsListOpts, roaring.NewPostingsList) 1876 1877 queryResultsPool := index.NewQueryResultsPool( 1878 poolOptions(policy.IndexResultsPool, scope.SubScope("index-query-results-pool"))) 1879 aggregateQueryResultsPool := index.NewAggregateResultsPool( 1880 poolOptions(policy.IndexResultsPool, scope.SubScope("index-aggregate-results-pool"))) 1881 aggregateQueryValuesPool := index.NewAggregateValuesPool( 1882 poolOptions(policy.IndexResultsPool, scope.SubScope("index-aggregate-values-pool"))) 1883 1884 // Set value transformation options. 1885 opts = opts.SetTruncateType(cfg.Transforms.TruncateBy) 1886 forcedValue := cfg.Transforms.ForcedValue 1887 if forcedValue != nil { 1888 opts = opts.SetWriteTransformOptions(series.WriteTransformOptions{ 1889 ForceValueEnabled: true, 1890 ForceValue: *forcedValue, 1891 }) 1892 } 1893 1894 // Set index options. 1895 indexOpts := opts.IndexOptions(). 1896 SetInstrumentOptions(iOpts). 1897 SetMemSegmentOptions( 1898 opts.IndexOptions().MemSegmentOptions(). 1899 SetPostingsListPool(postingsList). 1900 SetInstrumentOptions(iOpts)). 1901 SetFSTSegmentOptions( 1902 opts.IndexOptions().FSTSegmentOptions(). 1903 SetPostingsListPool(postingsList). 1904 SetInstrumentOptions(iOpts). 1905 SetContextPool(opts.ContextPool())). 1906 SetSegmentBuilderOptions( 1907 opts.IndexOptions().SegmentBuilderOptions(). 1908 SetPostingsListPool(postingsList)). 1909 SetIdentifierPool(identifierPool). 1910 SetCheckedBytesPool(bytesPool). 1911 SetQueryResultsPool(queryResultsPool). 1912 SetAggregateResultsPool(aggregateQueryResultsPool). 1913 SetAggregateValuesPool(aggregateQueryValuesPool). 1914 SetForwardIndexProbability(cfg.Index.ForwardIndexProbability). 1915 SetForwardIndexThreshold(cfg.Index.ForwardIndexThreshold) 1916 1917 queryResultsPool.Init(func() index.QueryResults { 1918 // NB(r): Need to initialize after setting the index opts so 1919 // it sees the same reference of the options as is set for the DB. 1920 return index.NewQueryResults(nil, index.QueryResultsOptions{}, indexOpts) 1921 }) 1922 aggregateQueryResultsPool.Init(func() index.AggregateResults { 1923 // NB(r): Need to initialize after setting the index opts so 1924 // it sees the same reference of the options as is set for the DB. 1925 return index.NewAggregateResults(nil, index.AggregateResultsOptions{}, indexOpts) 1926 }) 1927 aggregateQueryValuesPool.Init(func() index.AggregateValues { 1928 // NB(r): Need to initialize after setting the index opts so 1929 // it sees the same reference of the options as is set for the DB. 1930 return index.NewAggregateValues(indexOpts) 1931 }) 1932 1933 return opts.SetIndexOptions(indexOpts) 1934 } 1935 1936 func newAdminClient( 1937 config client.Configuration, 1938 clockOpts clock.Options, 1939 iOpts instrument.Options, 1940 tchannelOpts *tchannel.ChannelOptions, 1941 topologyInitializer topology.Initializer, 1942 runtimeOptsMgr m3dbruntime.OptionsManager, 1943 origin topology.Host, 1944 protoEnabled bool, 1945 schemaRegistry namespace.SchemaRegistry, 1946 kvStore kv.Store, 1947 contextPool xcontext.Pool, 1948 checkedBytesPool pool.CheckedBytesPool, 1949 identifierPool ident.Pool, 1950 logger *zap.Logger, 1951 custom []client.CustomAdminOption, 1952 ) (client.AdminClient, error) { 1953 if config.EnvironmentConfig != nil { 1954 // If the user has provided an override for the dynamic client configuration 1955 // then we need to honor it by not passing our own topology initializer. 1956 topologyInitializer = nil 1957 } 1958 1959 // NB: append custom options coming from run options to existing options. 1960 options := []client.CustomAdminOption{ 1961 func(opts client.AdminOptions) client.AdminOptions { 1962 return opts.SetChannelOptions(tchannelOpts).(client.AdminOptions) 1963 }, 1964 func(opts client.AdminOptions) client.AdminOptions { 1965 return opts.SetRuntimeOptionsManager(runtimeOptsMgr).(client.AdminOptions) 1966 }, 1967 func(opts client.AdminOptions) client.AdminOptions { 1968 return opts.SetContextPool(contextPool).(client.AdminOptions) 1969 }, 1970 func(opts client.AdminOptions) client.AdminOptions { 1971 return opts.SetCheckedBytesPool(checkedBytesPool).(client.AdminOptions) 1972 }, 1973 func(opts client.AdminOptions) client.AdminOptions { 1974 return opts.SetIdentifierPool(identifierPool).(client.AdminOptions) 1975 }, 1976 func(opts client.AdminOptions) client.AdminOptions { 1977 return opts.SetOrigin(origin).(client.AdminOptions) 1978 }, 1979 func(opts client.AdminOptions) client.AdminOptions { 1980 if protoEnabled { 1981 return opts.SetEncodingProto(encoding.NewOptions()).(client.AdminOptions) 1982 } 1983 return opts 1984 }, 1985 func(opts client.AdminOptions) client.AdminOptions { 1986 return opts.SetSchemaRegistry(schemaRegistry).(client.AdminOptions) 1987 }, 1988 } 1989 1990 options = append(options, custom...) 1991 m3dbClient, err := config.NewAdminClient( 1992 client.ConfigurationParameters{ 1993 ClockOptions: clockOpts, 1994 InstrumentOptions: iOpts. 1995 SetMetricsScope(iOpts.MetricsScope().SubScope("m3dbclient")), 1996 TopologyInitializer: topologyInitializer, 1997 }, 1998 options..., 1999 ) 2000 if err != nil { 2001 return nil, err 2002 } 2003 2004 // Kick off runtime options manager KV watches. 2005 clientAdminOpts := m3dbClient.Options().(client.AdminOptions) 2006 kvWatchClientConsistencyLevels(kvStore, logger, 2007 clientAdminOpts, runtimeOptsMgr) 2008 return m3dbClient, nil 2009 } 2010 2011 func poolOptions( 2012 policy config.PoolPolicy, 2013 scope tally.Scope, 2014 ) pool.ObjectPoolOptions { 2015 var ( 2016 opts = pool.NewObjectPoolOptions() 2017 size = policy.SizeOrDefault() 2018 refillLowWaterMark = policy.RefillLowWaterMarkOrDefault() 2019 refillHighWaterMark = policy.RefillHighWaterMarkOrDefault() 2020 ) 2021 2022 if size > 0 { 2023 opts = opts.SetSize(int(size)) 2024 if refillLowWaterMark > 0 && 2025 refillHighWaterMark > 0 && 2026 refillHighWaterMark > refillLowWaterMark { 2027 opts = opts. 2028 SetRefillLowWatermark(refillLowWaterMark). 2029 SetRefillHighWatermark(refillHighWaterMark) 2030 } 2031 } 2032 opts = opts.SetDynamic(size.IsDynamic()) 2033 2034 if scope != nil { 2035 opts = opts.SetInstrumentOptions(opts.InstrumentOptions(). 2036 SetMetricsScope(scope)) 2037 } 2038 return opts 2039 } 2040 2041 func capacityPoolOptions( 2042 policy config.CapacityPoolPolicy, 2043 scope tally.Scope, 2044 ) pool.ObjectPoolOptions { 2045 var ( 2046 opts = pool.NewObjectPoolOptions() 2047 size = policy.SizeOrDefault() 2048 refillLowWaterMark = policy.RefillLowWaterMarkOrDefault() 2049 refillHighWaterMark = policy.RefillHighWaterMarkOrDefault() 2050 ) 2051 2052 if size > 0 { 2053 opts = opts.SetSize(int(size)) 2054 if refillLowWaterMark > 0 && 2055 refillHighWaterMark > 0 && 2056 refillHighWaterMark > refillLowWaterMark { 2057 opts = opts.SetRefillLowWatermark(refillLowWaterMark) 2058 opts = opts.SetRefillHighWatermark(refillHighWaterMark) 2059 } 2060 } 2061 opts = opts.SetDynamic(size.IsDynamic()) 2062 2063 if scope != nil { 2064 opts = opts.SetInstrumentOptions(opts.InstrumentOptions(). 2065 SetMetricsScope(scope)) 2066 } 2067 return opts 2068 } 2069 2070 func maxCapacityPoolOptions( 2071 policy config.MaxCapacityPoolPolicy, 2072 scope tally.Scope, 2073 ) pool.ObjectPoolOptions { 2074 var ( 2075 opts = pool.NewObjectPoolOptions() 2076 size = policy.SizeOrDefault() 2077 refillLowWaterMark = policy.RefillLowWaterMarkOrDefault() 2078 refillHighWaterMark = policy.RefillHighWaterMarkOrDefault() 2079 ) 2080 2081 if size > 0 { 2082 opts = opts.SetSize(int(size)) 2083 if refillLowWaterMark > 0 && 2084 refillHighWaterMark > 0 && 2085 refillHighWaterMark > refillLowWaterMark { 2086 opts = opts.SetRefillLowWatermark(refillLowWaterMark) 2087 opts = opts.SetRefillHighWatermark(refillHighWaterMark) 2088 } 2089 } 2090 opts = opts.SetDynamic(size.IsDynamic()) 2091 2092 if scope != nil { 2093 opts = opts.SetInstrumentOptions(opts.InstrumentOptions(). 2094 SetMetricsScope(scope)) 2095 } 2096 return opts 2097 } 2098 2099 func hostSupportsHugeTLB() (bool, error) { 2100 // Try and determine if the host supports HugeTLB in the first place 2101 withHugeTLB, err := mmap.Bytes(10, mmap.Options{ 2102 HugeTLB: mmap.HugeTLBOptions{ 2103 Enabled: true, 2104 Threshold: 0, 2105 }, 2106 }) 2107 if err != nil { 2108 return false, fmt.Errorf("could not mmap anonymous region: %v", err) 2109 } 2110 defer mmap.Munmap(withHugeTLB) 2111 2112 if withHugeTLB.Warning == nil { 2113 // If there was no warning, then the host didn't complain about 2114 // usa of huge TLB 2115 return true, nil 2116 } 2117 2118 // If we got a warning, try mmap'ing without HugeTLB 2119 withoutHugeTLB, err := mmap.Bytes(10, mmap.Options{}) 2120 if err != nil { 2121 return false, fmt.Errorf("could not mmap anonymous region: %v", err) 2122 } 2123 defer mmap.Munmap(withoutHugeTLB) 2124 if withoutHugeTLB.Warning == nil { 2125 // The machine doesn't support HugeTLB, proceed without it 2126 return false, nil 2127 } 2128 // The warning was probably caused by something else, proceed using HugeTLB 2129 return true, nil 2130 } 2131 2132 func newTopoMapProvider(t topology.Topology) *topoMapProvider { 2133 return &topoMapProvider{t} 2134 } 2135 2136 type topoMapProvider struct { 2137 t topology.Topology 2138 } 2139 2140 func (t *topoMapProvider) TopologyMap() (topology.Map, error) { 2141 if t.t == nil { 2142 return nil, errors.New("topology map provider has not be set yet") 2143 } 2144 2145 return t.t.Get(), nil 2146 } 2147 2148 // Ensure mmap reporter implements mmap.Reporter 2149 var _ mmap.Reporter = (*mmapReporter)(nil) 2150 2151 type mmapReporter struct { 2152 sync.Mutex 2153 scope tally.Scope 2154 entries map[string]*mmapReporterEntry 2155 } 2156 2157 type mmapReporterEntry struct { 2158 value int64 2159 gauge tally.Gauge 2160 } 2161 2162 func newMmapReporter(scope tally.Scope) *mmapReporter { 2163 return &mmapReporter{ 2164 scope: scope, 2165 entries: make(map[string]*mmapReporterEntry), 2166 } 2167 } 2168 2169 func (r *mmapReporter) Run(ctx context.Context) { 2170 ticker := time.NewTicker(30 * time.Second) 2171 defer ticker.Stop() 2172 2173 for { 2174 select { 2175 case <-ctx.Done(): 2176 return 2177 case <-ticker.C: 2178 r.Lock() 2179 for _, r := range r.entries { 2180 r.gauge.Update(float64(r.value)) 2181 } 2182 r.Unlock() 2183 } 2184 } 2185 } 2186 2187 func (r *mmapReporter) entryKeyAndTags(ctx mmap.Context) (string, map[string]string) { 2188 numTags := 1 2189 if ctx.Metadata != nil { 2190 numTags += len(ctx.Metadata) 2191 } 2192 2193 tags := make(map[string]string, numTags) 2194 tags[mmapReporterTagName] = ctx.Name 2195 if ctx.Metadata != nil { 2196 for k, v := range ctx.Metadata { 2197 tags[k] = v 2198 } 2199 } 2200 2201 entryKey := tally.KeyForStringMap(tags) 2202 return entryKey, tags 2203 } 2204 2205 func (r *mmapReporter) ReportMap(ctx mmap.Context) error { 2206 if ctx.Name == "" { 2207 return fmt.Errorf("report mmap map missing context name: %+v", ctx) 2208 } 2209 2210 entryKey, entryTags := r.entryKeyAndTags(ctx) 2211 2212 r.Lock() 2213 defer r.Unlock() 2214 2215 entry, ok := r.entries[entryKey] 2216 if !ok { 2217 entry = &mmapReporterEntry{ 2218 gauge: r.scope.Tagged(entryTags).Gauge(mmapReporterMetricName), 2219 } 2220 r.entries[entryKey] = entry 2221 } 2222 2223 entry.value += ctx.Size 2224 2225 return nil 2226 } 2227 2228 func (r *mmapReporter) ReportUnmap(ctx mmap.Context) error { 2229 if ctx.Name == "" { 2230 return fmt.Errorf("report mmap unmap missing context name: %+v", ctx) 2231 } 2232 2233 entryKey, _ := r.entryKeyAndTags(ctx) 2234 2235 r.Lock() 2236 defer r.Unlock() 2237 2238 entry, ok := r.entries[entryKey] 2239 if !ok { 2240 return fmt.Errorf("report mmap unmap missing entry for context: %+v", ctx) 2241 } 2242 2243 entry.value -= ctx.Size 2244 2245 if entry.value == 0 { 2246 // No more similar mmaps active for this context name, garbage collect 2247 delete(r.entries, entryKey) 2248 } 2249 2250 return nil 2251 }