github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/bootstrap/process.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package bootstrap 22 23 import ( 24 "errors" 25 "fmt" 26 "sync" 27 "time" 28 29 "github.com/m3db/m3/src/cluster/shard" 30 "github.com/m3db/m3/src/dbnode/namespace" 31 "github.com/m3db/m3/src/dbnode/persist" 32 "github.com/m3db/m3/src/dbnode/persist/fs" 33 "github.com/m3db/m3/src/dbnode/storage/bootstrap/result" 34 "github.com/m3db/m3/src/dbnode/topology" 35 "github.com/m3db/m3/src/dbnode/tracepoint" 36 "github.com/m3db/m3/src/x/clock" 37 "github.com/m3db/m3/src/x/context" 38 xtime "github.com/m3db/m3/src/x/time" 39 40 "github.com/opentracing/opentracing-go/log" 41 "go.uber.org/zap" 42 "go.uber.org/zap/zapcore" 43 ) 44 45 var ( 46 errNoOrigin = errors.New("no origin set for initial topology state") 47 errNoTopologyMap = errors.New("no topology map set for initial topology state") 48 ) 49 50 // bootstrapProcessProvider is the bootstrapping process provider. 51 type bootstrapProcessProvider struct { 52 sync.RWMutex 53 processOpts ProcessOptions 54 resultOpts result.Options 55 fsOpts fs.Options 56 log *zap.Logger 57 bootstrapperProvider BootstrapperProvider 58 } 59 60 // ErrFileSetSnapshotTypeRangeAdvanced is an error of bootstrap time ranges for snapshot-type 61 // blocks advancing during the bootstrap 62 var ErrFileSetSnapshotTypeRangeAdvanced = errors.New( 63 "retrying bootstrap in order to recalculate time ranges (this is OK)") 64 65 // NewProcessProvider creates a new bootstrap process provider. 66 func NewProcessProvider( 67 bootstrapperProvider BootstrapperProvider, 68 processOpts ProcessOptions, 69 resultOpts result.Options, 70 fsOpts fs.Options, 71 ) (ProcessProvider, error) { 72 if err := processOpts.Validate(); err != nil { 73 return nil, err 74 } 75 76 return &bootstrapProcessProvider{ 77 processOpts: processOpts, 78 resultOpts: resultOpts, 79 fsOpts: fsOpts, 80 log: resultOpts.InstrumentOptions().Logger(), 81 bootstrapperProvider: bootstrapperProvider, 82 }, nil 83 } 84 85 func (b *bootstrapProcessProvider) SetBootstrapperProvider(bootstrapperProvider BootstrapperProvider) { 86 b.Lock() 87 defer b.Unlock() 88 b.bootstrapperProvider = bootstrapperProvider 89 } 90 91 func (b *bootstrapProcessProvider) BootstrapperProvider() BootstrapperProvider { 92 b.RLock() 93 defer b.RUnlock() 94 return b.bootstrapperProvider 95 } 96 97 func (b *bootstrapProcessProvider) Provide() (Process, error) { 98 b.RLock() 99 defer b.RUnlock() 100 bootstrapper, err := b.bootstrapperProvider.Provide() 101 if err != nil { 102 return nil, err 103 } 104 105 topoMap, err := b.processOpts.TopologyMapProvider().TopologyMap() 106 if err != nil { 107 return nil, err 108 } 109 110 origin := b.processOpts.Origin() 111 initialTopologyState, err := newInitialTopologyState(origin, topoMap) 112 if err != nil { 113 return nil, err 114 } 115 116 return bootstrapProcess{ 117 processOpts: b.processOpts, 118 resultOpts: b.resultOpts, 119 fsOpts: b.fsOpts, 120 nowFn: b.resultOpts.ClockOptions().NowFn(), 121 log: b.log, 122 bootstrapper: bootstrapper, 123 initialTopologyState: initialTopologyState, 124 }, nil 125 } 126 127 func newInitialTopologyState( 128 origin topology.Host, 129 topoMap topology.Map, 130 ) (*topology.StateSnapshot, error) { 131 if origin == nil { 132 return nil, errNoOrigin 133 } 134 if topoMap == nil { 135 return nil, errNoTopologyMap 136 } 137 138 var ( 139 hostShardSets = topoMap.HostShardSets() 140 topologyState = &topology.StateSnapshot{ 141 Origin: origin, 142 MajorityReplicas: topoMap.MajorityReplicas(), 143 ShardStates: topology.ShardStates{}, 144 } 145 ) 146 147 for _, hostShardSet := range hostShardSets { 148 for _, currShard := range hostShardSet.ShardSet().All() { 149 shardID := topology.ShardID(currShard.ID()) 150 existing, ok := topologyState.ShardStates[shardID] 151 if !ok { 152 existing = map[topology.HostID]topology.HostShardState{} 153 topologyState.ShardStates[shardID] = existing 154 } 155 156 hostID := topology.HostID(hostShardSet.Host().ID()) 157 existing[hostID] = topology.HostShardState{ 158 Host: hostShardSet.Host(), 159 ShardState: currShard.State(), 160 } 161 } 162 } 163 164 return topologyState, nil 165 } 166 167 type bootstrapProcess struct { 168 processOpts ProcessOptions 169 resultOpts result.Options 170 fsOpts fs.Options 171 nowFn clock.NowFn 172 log *zap.Logger 173 bootstrapper Bootstrapper 174 initialTopologyState *topology.StateSnapshot 175 } 176 177 func (b bootstrapProcess) Run( 178 ctx context.Context, 179 at xtime.UnixNano, 180 namespaces []ProcessNamespace, 181 ) (NamespaceResults, error) { 182 namespacesRunFirst := Namespaces{ 183 Namespaces: NewNamespacesMap(NamespacesMapOptions{}), 184 } 185 namespacesRunSecond := Namespaces{ 186 Namespaces: NewNamespacesMap(NamespacesMapOptions{}), 187 } 188 namespaceDetails := make([]NamespaceDetails, 0, len(namespaces)) 189 for _, namespace := range namespaces { 190 var ( 191 nsOpts = namespace.Metadata.Options() 192 dataRanges = b.targetRangesForData(at, nsOpts) 193 indexRanges = b.targetRangesForIndex(at, nsOpts) 194 firstRanges = b.newShardTimeRanges( 195 dataRanges.firstRangeWithPersistTrue.Range, 196 namespace.Shards, 197 ) 198 ) 199 200 namespacesRunFirst.Namespaces.Set(namespace.Metadata.ID(), Namespace{ 201 Metadata: namespace.Metadata, 202 Shards: namespace.Shards, 203 DataAccumulator: namespace.DataAccumulator, 204 Hooks: namespace.Hooks, 205 DataTargetRange: dataRanges.firstRangeWithPersistTrue, 206 IndexTargetRange: indexRanges.firstRangeWithPersistTrue, 207 DataRunOptions: NamespaceRunOptions{ 208 ShardTimeRanges: firstRanges.Copy(), 209 TargetShardTimeRanges: firstRanges.Copy(), 210 RunOptions: dataRanges.firstRangeWithPersistTrue.RunOptions, 211 }, 212 IndexRunOptions: NamespaceRunOptions{ 213 ShardTimeRanges: firstRanges.Copy(), 214 TargetShardTimeRanges: firstRanges.Copy(), 215 RunOptions: indexRanges.firstRangeWithPersistTrue.RunOptions, 216 }, 217 }) 218 secondRanges := b.newShardTimeRanges( 219 dataRanges.secondRange.Range, namespace.Shards) 220 namespacesRunSecond.Namespaces.Set(namespace.Metadata.ID(), Namespace{ 221 Metadata: namespace.Metadata, 222 Shards: namespace.Shards, 223 DataAccumulator: namespace.DataAccumulator, 224 Hooks: namespace.Hooks, 225 DataTargetRange: dataRanges.secondRange, 226 IndexTargetRange: indexRanges.secondRange, 227 DataRunOptions: NamespaceRunOptions{ 228 ShardTimeRanges: secondRanges.Copy(), 229 TargetShardTimeRanges: secondRanges.Copy(), 230 RunOptions: dataRanges.secondRange.RunOptions, 231 }, 232 IndexRunOptions: NamespaceRunOptions{ 233 ShardTimeRanges: secondRanges.Copy(), 234 TargetShardTimeRanges: secondRanges.Copy(), 235 RunOptions: indexRanges.secondRange.RunOptions, 236 }, 237 }) 238 namespaceDetails = append(namespaceDetails, NamespaceDetails{ 239 Namespace: namespace.Metadata, 240 Shards: namespace.Shards, 241 }) 242 } 243 cache, err := NewCache(NewCacheOptions(). 244 SetFilesystemOptions(b.fsOpts). 245 SetNamespaceDetails(namespaceDetails). 246 SetInstrumentOptions(b.fsOpts.InstrumentOptions())) 247 if err != nil { 248 return NamespaceResults{}, err 249 } 250 251 var ( 252 bootstrapResult = NewNamespaceResults(namespacesRunFirst) 253 namespacesToRun = []Namespaces{namespacesRunFirst, namespacesRunSecond} 254 lastRunIndex = len(namespacesToRun) - 1 255 ) 256 for runIndex, namespaces := range namespacesToRun { 257 for _, entry := range namespaces.Namespaces.Iter() { 258 ns := entry.Value() 259 260 // First determine if any shards that we are bootstrapping are 261 // initializing and hence might need peer bootstrapping and if so 262 // make sure the time ranges reflect the time window that should 263 // be bootstrapped from peers (in case time has shifted considerably). 264 if !b.shardsInitializingAny(ns.Shards) { 265 // No shards initializing, don't need to run check to see if 266 // time has shifted. 267 continue 268 } 269 270 // If last run, check if ranges have advanced while bootstrapping previous ranges. 271 // If yes, return an error to force a retry. 272 if runIndex == lastRunIndex { 273 var ( 274 now = xtime.ToUnixNano(b.nowFn()) 275 nsOptions = ns.Metadata.Options() 276 upToDateDataRanges = b.targetRangesForData(now, nsOptions) 277 ) 278 // Only checking data ranges. Since index blocks can only be a multiple of 279 // data block size, the ranges for index could advance only if data ranges 280 // have advanced, too (while opposite is not necessarily true) 281 if !upToDateDataRanges.secondRange.Range.Equal(ns.DataTargetRange.Range) { 282 upToDateIndexRanges := b.targetRangesForIndex(now, nsOptions) 283 fields := b.logFields(ns.Metadata, ns.Shards, 284 upToDateDataRanges.secondRange.Range, 285 upToDateIndexRanges.secondRange.Range) 286 b.log.Error("time ranges of snapshot-type blocks advanced", fields...) 287 return NamespaceResults{}, ErrFileSetSnapshotTypeRangeAdvanced 288 } 289 } 290 } 291 292 res, err := b.runPass(ctx, namespaces, cache) 293 if err != nil { 294 return NamespaceResults{}, err 295 } 296 297 bootstrapResult = MergeNamespaceResults(bootstrapResult, res) 298 } 299 300 return bootstrapResult, nil 301 } 302 303 func (b bootstrapProcess) shardsInitializingAny( 304 shards []uint32, 305 ) bool { 306 for _, value := range shards { 307 shardID := topology.ShardID(value) 308 hostShardStates, ok := b.initialTopologyState.ShardStates[shardID] 309 if !ok { 310 // This shard was not part of the topology when the bootstrapping 311 // process began. 312 continue 313 } 314 315 originID := topology.HostID(b.initialTopologyState.Origin.ID()) 316 originHostShardState, ok := hostShardStates[originID] 317 if !ok { 318 // This shard was not part of the origin's shard. 319 continue 320 } 321 322 if originHostShardState.ShardState == shard.Initializing { 323 return true 324 } 325 } 326 327 return false 328 } 329 330 func (b bootstrapProcess) runPass( 331 ctx context.Context, 332 namespaces Namespaces, 333 cache Cache, 334 ) (NamespaceResults, error) { 335 ctx, span, sampled := ctx.StartSampledTraceSpan(tracepoint.BootstrapProcessRun) 336 defer span.Finish() 337 338 i := 0 339 for _, entry := range namespaces.Namespaces.Iter() { 340 ns := entry.Value() 341 idx := i 342 i++ 343 344 if sampled { 345 ext := fmt.Sprintf("[%d]", idx) 346 span.LogFields( 347 log.String("namespace"+ext, ns.Metadata.ID().String()), 348 log.Int("shards"+ext, len(ns.Shards)), 349 log.String("dataRange"+ext, ns.DataTargetRange.Range.String()), 350 log.String("indexRange"+ext, ns.IndexTargetRange.Range.String()), 351 ) 352 } 353 354 logFields := b.logFields(ns.Metadata, ns.Shards, 355 ns.DataTargetRange.Range, ns.IndexTargetRange.Range) 356 b.logBootstrapRun(logFields) 357 } 358 359 begin := b.nowFn() 360 res, err := b.bootstrapper.Bootstrap(ctx, namespaces, cache) 361 took := b.nowFn().Sub(begin) 362 if err != nil { 363 b.log.Error("bootstrap process error", 364 zap.Duration("took", took), 365 zap.Error(err)) 366 return NamespaceResults{}, err 367 } 368 369 for _, entry := range namespaces.Namespaces.Iter() { 370 namespace := entry.Value() 371 nsID := namespace.Metadata.ID() 372 373 result, ok := res.Results.Get(nsID) 374 if !ok { 375 return NamespaceResults{}, 376 fmt.Errorf("result missing for namespace: %v", nsID.String()) 377 } 378 379 logFields := b.logFields(namespace.Metadata, namespace.Shards, 380 namespace.DataTargetRange.Range, namespace.IndexTargetRange.Range) 381 b.logBootstrapResult(result, logFields, took) 382 } 383 384 return res, nil 385 } 386 387 func (b bootstrapProcess) logFields( 388 namespace namespace.Metadata, 389 shards []uint32, 390 dataTimeWindow xtime.Range, 391 indexTimeWindow xtime.Range, 392 ) []zapcore.Field { 393 fields := []zapcore.Field{ 394 zap.String("bootstrapper", b.bootstrapper.String()), 395 zap.Stringer("namespace", namespace.ID()), 396 zap.Int("numShards", len(shards)), 397 zap.Time("dataFrom", dataTimeWindow.Start.ToTime()), 398 zap.Time("dataTo", dataTimeWindow.End.ToTime()), 399 zap.Duration("dataRange", dataTimeWindow.End.Sub(dataTimeWindow.Start)), 400 } 401 if namespace.Options().IndexOptions().Enabled() { 402 fields = append(fields, 403 zap.Time("indexFrom", indexTimeWindow.Start.ToTime()), 404 zap.Time("indexTo", indexTimeWindow.End.ToTime()), 405 zap.Duration("indexRange", indexTimeWindow.End.Sub(indexTimeWindow.Start)), 406 ) 407 } 408 return fields 409 } 410 411 func (b bootstrapProcess) newShardTimeRanges( 412 window xtime.Range, 413 shards []uint32, 414 ) result.ShardTimeRanges { 415 shardsTimeRanges := result.NewShardTimeRanges() 416 ranges := xtime.NewRanges(window) 417 for _, s := range shards { 418 shardsTimeRanges.Set(s, ranges) 419 } 420 return shardsTimeRanges 421 } 422 423 func (b bootstrapProcess) logBootstrapRun( 424 logFields []zapcore.Field, 425 ) { 426 b.log.Info("bootstrap range starting", logFields...) 427 } 428 429 func (b bootstrapProcess) logBootstrapResult( 430 result NamespaceResult, 431 logFields []zapcore.Field, 432 took time.Duration, 433 ) { 434 logFields = append(logFields, 435 zap.Duration("took", took)) 436 if result.IndexResult != nil { 437 logFields = append(logFields, 438 zap.Int("numIndexBlocks", len(result.IndexResult.IndexResults()))) 439 } 440 441 b.log.Info("bootstrap range completed", logFields...) 442 } 443 444 func (b bootstrapProcess) targetRangesForData( 445 at xtime.UnixNano, 446 nsOpts namespace.Options, 447 ) targetRangesResult { 448 ropts := nsOpts.RetentionOptions() 449 return b.targetRanges(at, targetRangesOptions{ 450 retentionPeriod: ropts.RetentionPeriod(), 451 futureRetentionPeriod: ropts.FutureRetentionPeriod(), 452 blockSize: ropts.BlockSize(), 453 bufferPast: ropts.BufferPast(), 454 bufferFuture: ropts.BufferFuture(), 455 snapshotEnabled: nsOpts.SnapshotEnabled(), 456 }) 457 } 458 459 func (b bootstrapProcess) targetRangesForIndex( 460 at xtime.UnixNano, 461 nsOpts namespace.Options, 462 ) targetRangesResult { 463 ropts := nsOpts.RetentionOptions() 464 return b.targetRanges(at, targetRangesOptions{ 465 retentionPeriod: ropts.RetentionPeriod(), 466 futureRetentionPeriod: ropts.FutureRetentionPeriod(), 467 blockSize: nsOpts.IndexOptions().BlockSize(), 468 bufferPast: ropts.BufferPast(), 469 bufferFuture: ropts.BufferFuture(), 470 snapshotEnabled: nsOpts.SnapshotEnabled(), 471 }) 472 } 473 474 type targetRangesOptions struct { 475 retentionPeriod time.Duration 476 futureRetentionPeriod time.Duration 477 blockSize time.Duration 478 bufferPast time.Duration 479 bufferFuture time.Duration 480 snapshotEnabled bool 481 } 482 483 type targetRangesResult struct { 484 firstRangeWithPersistTrue TargetRange 485 secondRange TargetRange 486 } 487 488 func (b bootstrapProcess) targetRanges( 489 at xtime.UnixNano, 490 opts targetRangesOptions, 491 ) targetRangesResult { 492 start := at.Add(-opts.retentionPeriod). 493 Truncate(opts.blockSize) 494 midPoint := at. 495 Add(-opts.blockSize). 496 Add(-opts.bufferPast). 497 Truncate(opts.blockSize). 498 // NB(r): Since "end" is exclusive we need to add a 499 // an extra block size when specifying the end time. 500 Add(opts.blockSize) 501 cutover := at.Add(opts.bufferFuture). 502 Truncate(opts.blockSize). 503 Add(opts.blockSize) 504 505 secondRangeFilesetType := persist.FileSetSnapshotType 506 if !opts.snapshotEnabled { 507 // NB: If snapshots are disabled for a namespace, we want to use flush type. 508 secondRangeFilesetType = persist.FileSetFlushType 509 } 510 511 // NB(r): We want the large initial time range bootstrapped to 512 // bootstrap with persistence so we don't keep the full raw 513 // data in process until we finish bootstrapping which could 514 // cause the process to OOM. 515 return targetRangesResult{ 516 firstRangeWithPersistTrue: TargetRange{ 517 Range: xtime.Range{Start: start, End: midPoint}, 518 RunOptions: b.newRunOptions().SetPersistConfig(PersistConfig{ 519 Enabled: true, 520 // These blocks are no longer active, so we want to flush them 521 // to disk as we receive them so that we don't hold too much 522 // data in memory at once. 523 FileSetType: persist.FileSetFlushType, 524 }), 525 }, 526 secondRange: TargetRange{ 527 Range: xtime.Range{Start: midPoint, End: cutover}, 528 RunOptions: b.newRunOptions().SetPersistConfig(PersistConfig{ 529 Enabled: true, 530 // These blocks are still active so we'll have to keep them 531 // in memory, but we want to snapshot them as we receive them 532 // so that once bootstrapping completes we can still recover 533 // from just the commit log bootstrapper. 534 FileSetType: secondRangeFilesetType, 535 }), 536 }, 537 } 538 } 539 540 func (b bootstrapProcess) newRunOptions() RunOptions { 541 return NewRunOptions(). 542 SetCacheSeriesMetadata( 543 b.processOpts.CacheSeriesMetadata(), 544 ). 545 SetInitialTopologyState(b.initialTopologyState) 546 } 547 548 // NewNamespaces returns a new set of bootstrappable namespaces. 549 func NewNamespaces( 550 namespaces []ProcessNamespace, 551 ) Namespaces { 552 namespacesMap := NewNamespacesMap(NamespacesMapOptions{}) 553 for _, ns := range namespaces { 554 namespacesMap.Set(ns.Metadata.ID(), Namespace{ 555 Metadata: ns.Metadata, 556 Shards: ns.Shards, 557 DataAccumulator: ns.DataAccumulator, 558 }) 559 } 560 return Namespaces{ 561 Namespaces: namespacesMap, 562 } 563 } 564 565 // NewNamespaceResults creates a 566 // namespace results map with an entry for each 567 // namespace spoecified by a namespaces map. 568 func NewNamespaceResults( 569 namespaces Namespaces, 570 ) NamespaceResults { 571 resultsMap := NewNamespaceResultsMap(NamespaceResultsMapOptions{}) 572 for _, entry := range namespaces.Namespaces.Iter() { 573 key := entry.Key() 574 value := entry.Value() 575 resultsMap.Set(key, NamespaceResult{ 576 Metadata: value.Metadata, 577 Shards: value.Shards, 578 DataResult: result.NewDataBootstrapResult(), 579 IndexResult: result.NewIndexBootstrapResult(), 580 }) 581 } 582 return NamespaceResults{ 583 Results: resultsMap, 584 } 585 } 586 587 // MergeNamespaceResults merges two namespace results, this will mutate 588 // both a and b and return a merged copy of them reusing one of the results. 589 func MergeNamespaceResults(a, b NamespaceResults) NamespaceResults { 590 for _, entry := range a.Results.Iter() { 591 id := entry.Key() 592 elem := entry.Value() 593 other, ok := b.Results.Get(id) 594 if !ok { 595 continue 596 } 597 elem.DataResult = result.MergedDataBootstrapResult(elem.DataResult, 598 other.DataResult) 599 elem.IndexResult = result.MergedIndexBootstrapResult(elem.IndexResult, 600 other.IndexResult) 601 602 // Save back the merged results. 603 a.Results.Set(id, elem) 604 605 // Remove from b, then can directly add to a all non-merged results. 606 b.Results.Delete(id) 607 } 608 // All overlapping between a and b have been merged, add rest to a. 609 for _, entry := range b.Results.Iter() { 610 a.Results.Set(entry.Key(), entry.Value()) 611 } 612 return a 613 }