github.com/m3db/m3@v1.5.0/src/dbnode/storage/database.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package storage 22 23 import ( 24 "bytes" 25 "errors" 26 "fmt" 27 "sync" 28 "sync/atomic" 29 "time" 30 31 opentracinglog "github.com/opentracing/opentracing-go/log" 32 "github.com/uber-go/tally" 33 "go.uber.org/zap" 34 35 "github.com/m3db/m3/src/cluster/shard" 36 "github.com/m3db/m3/src/dbnode/client" 37 "github.com/m3db/m3/src/dbnode/generated/proto/annotation" 38 "github.com/m3db/m3/src/dbnode/namespace" 39 "github.com/m3db/m3/src/dbnode/persist/fs/commitlog" 40 "github.com/m3db/m3/src/dbnode/sharding" 41 "github.com/m3db/m3/src/dbnode/storage/block" 42 dberrors "github.com/m3db/m3/src/dbnode/storage/errors" 43 "github.com/m3db/m3/src/dbnode/storage/index" 44 "github.com/m3db/m3/src/dbnode/storage/index/convert" 45 "github.com/m3db/m3/src/dbnode/storage/limits" 46 "github.com/m3db/m3/src/dbnode/storage/series" 47 "github.com/m3db/m3/src/dbnode/tracepoint" 48 "github.com/m3db/m3/src/dbnode/ts" 49 "github.com/m3db/m3/src/dbnode/ts/writes" 50 "github.com/m3db/m3/src/x/clock" 51 "github.com/m3db/m3/src/x/context" 52 xerrors "github.com/m3db/m3/src/x/errors" 53 "github.com/m3db/m3/src/x/ident" 54 "github.com/m3db/m3/src/x/instrument" 55 xopentracing "github.com/m3db/m3/src/x/opentracing" 56 xtime "github.com/m3db/m3/src/x/time" 57 ) 58 59 const ( 60 // The database is considered overloaded if the queue size is 90% or more 61 // of the maximum capacity. We set this below 1.0 because checking the queue 62 // lengthy is racey so we're gonna burst past this value anyways and the buffer 63 // gives us breathing room to recover. 64 commitLogQueueCapacityOverloadedFactor = 0.9 65 ) 66 67 var ( 68 // errDatabaseAlreadyOpen raised when trying to open a database that is already open. 69 errDatabaseAlreadyOpen = errors.New("database is already open") 70 71 // errDatabaseNotOpen raised when trying to close a database that is not open. 72 errDatabaseNotOpen = errors.New("database is not open") 73 74 // errDatabaseAlreadyClosed raised when trying to open a database that is already closed. 75 errDatabaseAlreadyClosed = errors.New("database is already closed") 76 77 // errDatabaseIsClosed raised when trying to perform an action that requires an open database. 78 errDatabaseIsClosed = errors.New("database is closed") 79 80 // errWriterDoesNotImplementWriteBatch is raised when the provided ts.BatchWriter does not implement 81 // ts.WriteBatch. 82 errWriterDoesNotImplementWriteBatch = errors.New("provided writer does not implement ts.WriteBatch") 83 aggregationsInProgress int32 84 ) 85 86 type databaseState int 87 88 const ( 89 databaseNotOpen databaseState = iota 90 databaseOpen 91 databaseClosed 92 ) 93 94 // increasingIndex provides a monotonically increasing index for new series 95 type increasingIndex interface { 96 nextIndex() uint64 97 } 98 99 type db struct { 100 sync.RWMutex 101 bootstrapMutex sync.Mutex 102 opts Options 103 nowFn clock.NowFn 104 105 nsWatch namespace.NamespaceWatch 106 namespaces *databaseNamespacesMap 107 runtimeOptionsRegistry namespace.RuntimeOptionsManagerRegistry 108 109 commitLog commitlog.CommitLog 110 111 state databaseState 112 mediator databaseMediator 113 repairer databaseRepairer 114 115 created uint64 116 bootstraps int 117 118 shardSet sharding.ShardSet 119 lastReceivedNewShards time.Time 120 121 scope tally.Scope 122 metrics databaseMetrics 123 log *zap.Logger 124 125 writeBatchPool *writes.WriteBatchPool 126 127 queryLimits limits.QueryLimits 128 } 129 130 type databaseMetrics struct { 131 unknownNamespaceRead tally.Counter 132 unknownNamespaceWrite tally.Counter 133 unknownNamespaceWriteTagged tally.Counter 134 unknownNamespaceBatchWriter tally.Counter 135 unknownNamespaceWriteBatch tally.Counter 136 unknownNamespaceWriteTaggedBatch tally.Counter 137 unknownNamespaceFetchBlocks tally.Counter 138 unknownNamespaceFetchBlocksMetadata tally.Counter 139 unknownNamespaceQueryIDs tally.Counter 140 errQueryIDsIndexDisabled tally.Counter 141 errWriteTaggedIndexDisabled tally.Counter 142 pendingNamespaceChange tally.Gauge 143 } 144 145 func newDatabaseMetrics(scope tally.Scope) databaseMetrics { 146 unknownNamespaceScope := scope.SubScope("unknown-namespace") 147 indexDisabledScope := scope.SubScope("index-disabled") 148 return databaseMetrics{ 149 unknownNamespaceRead: unknownNamespaceScope.Counter("read"), 150 unknownNamespaceWrite: unknownNamespaceScope.Counter("write"), 151 unknownNamespaceWriteTagged: unknownNamespaceScope.Counter("write-tagged"), 152 unknownNamespaceBatchWriter: unknownNamespaceScope.Counter("batch-writer"), 153 unknownNamespaceWriteBatch: unknownNamespaceScope.Counter("write-batch"), 154 unknownNamespaceWriteTaggedBatch: unknownNamespaceScope.Counter("write-tagged-batch"), 155 unknownNamespaceFetchBlocks: unknownNamespaceScope.Counter("fetch-blocks"), 156 unknownNamespaceFetchBlocksMetadata: unknownNamespaceScope.Counter("fetch-blocks-metadata"), 157 unknownNamespaceQueryIDs: unknownNamespaceScope.Counter("query-ids"), 158 errQueryIDsIndexDisabled: indexDisabledScope.Counter("err-query-ids"), 159 errWriteTaggedIndexDisabled: indexDisabledScope.Counter("err-write-tagged"), 160 pendingNamespaceChange: scope.Gauge("pending-namespace-change"), 161 } 162 } 163 164 // NewDatabase creates a new time series database. 165 func NewDatabase( 166 shardSet sharding.ShardSet, 167 opts Options, 168 ) (Database, error) { 169 if err := opts.Validate(); err != nil { 170 return nil, fmt.Errorf("invalid options: %v", err) 171 } 172 173 commitLog, err := commitlog.NewCommitLog(opts.CommitLogOptions()) 174 if err != nil { 175 return nil, err 176 } 177 if err := commitLog.Open(); err != nil { 178 return nil, err 179 } 180 181 var ( 182 iopts = opts.InstrumentOptions() 183 scope = iopts.MetricsScope().SubScope("database") 184 logger = iopts.Logger() 185 nowFn = opts.ClockOptions().NowFn() 186 ) 187 188 d := &db{ 189 opts: opts, 190 nowFn: nowFn, 191 shardSet: shardSet, 192 lastReceivedNewShards: nowFn(), 193 namespaces: newDatabaseNamespacesMap(databaseNamespacesMapOptions{}), 194 runtimeOptionsRegistry: opts.NamespaceRuntimeOptionsManagerRegistry(), 195 commitLog: commitLog, 196 scope: scope, 197 metrics: newDatabaseMetrics(scope), 198 log: logger, 199 writeBatchPool: opts.WriteBatchPool(), 200 queryLimits: opts.IndexOptions().QueryLimits(), 201 } 202 203 databaseIOpts := iopts.SetMetricsScope(scope) 204 205 // initialize namespaces 206 nsInit := opts.NamespaceInitializer() 207 208 logger.Info("creating namespaces watch") 209 nsReg, err := nsInit.Init() 210 if err != nil { 211 return nil, err 212 } 213 214 // get a namespace watch 215 watch, err := nsReg.Watch() 216 if err != nil { 217 return nil, err 218 } 219 220 // Wait till first namespaces value is received and set the value. 221 // Its important that this happens before the mediator is started to prevent 222 // a race condition where the namespaces haven't been initialized yet and 223 // OwnedNamespaces() returns an empty slice which makes the cleanup logic 224 // in the background Tick think it can clean up files that it shouldn't. 225 logger.Info("resolving namespaces with namespace watch") 226 <-watch.C() 227 dbUpdater := func(namespaces namespace.Map) error { 228 return d.UpdateOwnedNamespaces(namespaces) 229 } 230 d.nsWatch = namespace.NewNamespaceWatch(dbUpdater, watch, databaseIOpts) 231 nsMap := watch.Get() 232 if err := d.UpdateOwnedNamespaces(nsMap); err != nil { 233 // Log the error and proceed in case some namespace is miss-configured, e.g. missing schema. 234 // Miss-configured namespace won't be initialized, should not prevent database 235 // or other namespaces from getting initialized. 236 d.log.Error("failed to update owned namespaces", 237 zap.Error(err)) 238 } 239 240 d.mediator, err = newMediator( 241 d, commitLog, opts.SetInstrumentOptions(databaseIOpts)) 242 if err != nil { 243 return nil, err 244 } 245 246 d.repairer = newNoopDatabaseRepairer() 247 if opts.RepairEnabled() { 248 d.repairer, err = newDatabaseRepairer(d, opts) 249 if err != nil { 250 return nil, err 251 } 252 err = d.mediator.RegisterBackgroundProcess(d.repairer) 253 if err != nil { 254 return nil, err 255 } 256 } 257 258 for _, fn := range opts.BackgroundProcessFns() { 259 process, err := fn(d, opts) 260 if err != nil { 261 return nil, err 262 } 263 err = d.mediator.RegisterBackgroundProcess(process) 264 if err != nil { 265 return nil, err 266 } 267 } 268 269 return d, nil 270 } 271 272 func (d *db) UpdateOwnedNamespaces(newNamespaces namespace.Map) error { 273 if newNamespaces == nil { 274 return nil 275 } 276 // NB: Use bootstrapMutex to protect from competing calls. 277 asyncUnlock := false 278 d.bootstrapMutex.Lock() 279 defer func() { 280 if !asyncUnlock { 281 d.bootstrapMutex.Unlock() 282 } 283 }() 284 285 // Always update schema registry before owned namespaces. 286 if err := namespace.UpdateSchemaRegistry(newNamespaces, d.opts.SchemaRegistry(), d.log); err != nil { 287 // Log schema update error and proceed. 288 // In a multi-namespace database, schema update failure for one namespace be isolated. 289 d.log.Error("failed to update schema registry", zap.Error(err)) 290 } 291 292 // Always update the runtime options if they were set so that correct 293 // runtime options are set in the runtime options registry before namespaces 294 // are actually created. 295 for _, namespaceMetadata := range newNamespaces.Metadatas() { 296 id := namespaceMetadata.ID().String() 297 runtimeOptsMgr := d.runtimeOptionsRegistry.RuntimeOptionsManager(id) 298 currRuntimeOpts := runtimeOptsMgr.Get() 299 setRuntimeOpts := namespaceMetadata.Options().RuntimeOptions() 300 if !currRuntimeOpts.Equal(setRuntimeOpts) { 301 runtimeOptsMgr.Update(setRuntimeOpts) 302 } 303 } 304 305 // NB: Can hold lock since all long-running tasks are enqueued to run 306 // async while holding the lock. 307 d.Lock() 308 defer d.Unlock() 309 310 removes, adds, updates := d.namespaceDeltaWithLock(newNamespaces) 311 if err := d.logNamespaceUpdate(removes, adds, updates); err != nil { 312 d.log.Error("unable to log namespace updates", zap.Error(err)) 313 return err 314 } 315 316 // log that updates and removals are skipped 317 if len(removes) > 0 || len(updates) > 0 { 318 d.metrics.pendingNamespaceChange.Update(1) 319 d.log.Warn("skipping namespace removals and updates " + 320 "(except schema updates and runtime options), " + 321 "restart the process if you want changes to take effect") 322 } 323 324 if len(adds) > 0 { 325 if d.bootstraps == 0 || !d.mediatorIsOpenWithLock() { 326 // If no bootstraps yet or mediator is not open we can just 327 // add the namespaces and optionally enqueue bootstrap (which is 328 // async) since no file operations can be in place since 329 // no bootstrap and/or mediator is not open. 330 if err := d.addNamespacesWithLock(adds); err != nil { 331 d.log.Error("unable to add namespaces", zap.Error(err)) 332 return err 333 } 334 335 if d.bootstraps > 0 { 336 // If already bootstrapped before, enqueue another 337 // bootstrap (asynchronously, ok to trigger holding lock). 338 asyncUnlock = true 339 d.enqueueBootstrapAsync(d.bootstrapMutex.Unlock) 340 } 341 342 return nil 343 } 344 345 // NB: mediator is opened, we need to disable fileOps and wait for all the background processes to complete 346 // so that we could update namespaces safely. Otherwise, there is a high chance in getting 347 // invariant violation panic because cold/warm flush will receive new namespaces 348 // in the middle of their operations. 349 d.Unlock() // Don't hold the lock while we wait for file ops. 350 d.disableFileOpsAndWait() 351 d.Lock() // Reacquire lock after waiting. 352 353 // Add any namespaces marked for addition. 354 if err := d.addNamespacesWithLock(adds); err != nil { 355 d.log.Error("unable to add namespaces", zap.Error(err)) 356 d.enableFileOps() 357 return err 358 } 359 360 // Enqueue bootstrap and enable file ops when bootstrap is completed. 361 asyncUnlock = true 362 d.enqueueBootstrapAsyncWithLock( 363 func() { 364 d.enableFileOps() 365 d.bootstrapMutex.Unlock() 366 }) 367 } 368 return nil 369 } 370 371 func (d *db) mediatorIsOpenWithLock() bool { 372 if d.mediator == nil { 373 return false 374 } 375 return d.mediator.IsOpen() 376 } 377 378 func (d *db) disableFileOpsAndWait() { 379 if mediator := d.mediator; mediator != nil && mediator.IsOpen() { 380 d.log.Info("waiting for file ops to be disabled") 381 mediator.DisableFileOpsAndWait() 382 } 383 } 384 385 func (d *db) enableFileOps() { 386 if mediator := d.mediator; mediator != nil && mediator.IsOpen() { 387 d.log.Info("enabling file ops") 388 mediator.EnableFileOps() 389 } 390 } 391 392 func (d *db) namespaceDeltaWithLock(newNamespaces namespace.Map) ([]ident.ID, []namespace.Metadata, []namespace.Metadata) { 393 var ( 394 existing = d.namespaces 395 removes []ident.ID 396 adds []namespace.Metadata 397 updates []namespace.Metadata 398 ) 399 400 // check if existing namespaces exist in newNamespaces 401 for _, entry := range existing.Iter() { 402 ns := entry.Value() 403 newMd, err := newNamespaces.Get(ns.ID()) 404 // if a namespace doesn't exist in newNamespaces, mark for removal 405 if err != nil { 406 removes = append(removes, ns.ID()) 407 continue 408 } 409 410 // if namespace exists in newNamespaces, check if options are the same 411 optionsSame := newMd.Options().Equal(ns.Options()) 412 413 // if options are the same, we don't need to do anything 414 if optionsSame { 415 continue 416 } 417 418 // if options are not the same, we mark for updates 419 updates = append(updates, newMd) 420 } 421 422 // check for any namespaces that need to be added 423 for _, ns := range newNamespaces.Metadatas() { 424 _, exists := d.namespaces.Get(ns.ID()) 425 if !exists { 426 adds = append(adds, ns) 427 } 428 } 429 430 return removes, adds, updates 431 } 432 433 func (d *db) logNamespaceUpdate(removes []ident.ID, adds, updates []namespace.Metadata) error { 434 removalString, err := tsIDs(removes).String() 435 if err != nil { 436 return fmt.Errorf("unable to format removal, err = %v", err) 437 } 438 439 addString, err := metadatas(adds).String() 440 if err != nil { 441 return fmt.Errorf("unable to format adds, err = %v", err) 442 } 443 444 updateString, err := metadatas(updates).String() 445 if err != nil { 446 return fmt.Errorf("unable to format updates, err = %v", err) 447 } 448 449 // log scheduled operation 450 d.log.Info("updating database namespaces", 451 zap.String("adds", addString), 452 zap.String("updates", updateString), 453 zap.String("removals", removalString), 454 ) 455 456 // NB(prateek): as noted in `UpdateOwnedNamespaces()` above, the current implementation 457 // does not apply updates, and removals until the m3dbnode process is restarted. 458 459 return nil 460 } 461 462 func (d *db) addNamespacesWithLock(namespaces []namespace.Metadata) error { 463 createdNamespaces := make([]databaseNamespace, 0, len(namespaces)) 464 465 for _, n := range namespaces { 466 // ensure namespace doesn't exist 467 _, ok := d.namespaces.Get(n.ID()) 468 if ok { // should never happen 469 return fmt.Errorf("existing namespace marked for addition: %v", n.ID().String()) 470 } 471 472 // create and add to the database 473 newNs, err := d.newDatabaseNamespaceWithLock(n) 474 if err != nil { 475 return err 476 } 477 d.namespaces.Set(n.ID(), newNs) 478 createdNamespaces = append(createdNamespaces, newNs) 479 } 480 481 hooks := d.Options().NamespaceHooks() 482 for _, ns := range createdNamespaces { 483 err := hooks.OnCreatedNamespace(ns, d.getNamespaceWithLock) 484 if err != nil { 485 return err 486 } 487 } 488 489 return nil 490 } 491 492 func (d *db) getNamespaceWithLock(id ident.ID) (Namespace, bool) { 493 return d.namespaces.Get(id) 494 } 495 496 func (d *db) newDatabaseNamespaceWithLock( 497 md namespace.Metadata, 498 ) (databaseNamespace, error) { 499 var ( 500 retriever block.DatabaseBlockRetriever 501 err error 502 ) 503 if mgr := d.opts.DatabaseBlockRetrieverManager(); mgr != nil { 504 retriever, err = mgr.Retriever(md, d.shardSet) 505 if err != nil { 506 return nil, err 507 } 508 } 509 nsID := md.ID().String() 510 runtimeOptsMgr := d.runtimeOptionsRegistry.RuntimeOptionsManager(nsID) 511 return newDatabaseNamespace(md, runtimeOptsMgr, 512 d.shardSet, retriever, d, d.commitLog, d.opts) 513 } 514 515 func (d *db) Options() Options { 516 // Options are immutable safe to pass the current reference 517 return d.opts 518 } 519 520 func (d *db) AssignShardSet(shardSet sharding.ShardSet) { 521 // NB: Use bootstrapMutex to protect from competing calls. 522 d.bootstrapMutex.Lock() 523 asyncUnlock := false 524 defer func() { 525 if !asyncUnlock { 526 // Unlock only if asyncUnlock is not set. Otherwise, we will unlock asynchronously. 527 d.bootstrapMutex.Unlock() 528 } 529 }() 530 // NB: Can hold lock since all long running tasks are enqueued to run 531 // async while holding the lock. 532 d.Lock() 533 defer d.Unlock() 534 535 added, removed, updated := d.shardsDeltaWithLock(shardSet) 536 537 if !added && !removed && !updated { 538 d.log.Info("received identical shardSet, skipping shard assignment") 539 return 540 } 541 542 if added { 543 d.lastReceivedNewShards = d.nowFn() 544 } 545 546 if d.bootstraps == 0 || !d.mediatorIsOpenWithLock() { 547 // If not bootstrapped before or mediator is not open then can just 548 // immediately assign shards. 549 d.assignShardsWithLock(shardSet) 550 if d.bootstraps > 0 { 551 // If already bootstrapped before, enqueue another 552 // bootstrap (asynchronously, ok to trigger holding lock). 553 asyncUnlock = true 554 d.enqueueBootstrapAsync(d.bootstrapMutex.Unlock) 555 } 556 return 557 } 558 559 if added { 560 // Wait outside of holding lock to disable file operations. 561 d.Unlock() 562 d.disableFileOpsAndWait() 563 d.Lock() 564 } 565 566 d.assignShardsWithLock(shardSet) 567 568 if added { 569 asyncUnlock = true 570 d.enqueueBootstrapAsyncWithLock(func() { 571 d.enableFileOps() 572 d.bootstrapMutex.Unlock() 573 }) 574 } 575 } 576 577 func (d *db) assignShardsWithLock(shardSet sharding.ShardSet) { 578 d.log.Info("assigning shards", zap.Uint32s("shards", shardSet.AllIDs())) 579 d.shardSet = shardSet 580 for _, elem := range d.namespaces.Iter() { 581 ns := elem.Value() 582 ns.AssignShardSet(shardSet) 583 } 584 } 585 586 func (d *db) shardsDeltaWithLock(incoming sharding.ShardSet) (bool, bool, bool) { 587 var ( 588 existing = d.shardSet 589 existingShards = existing.All() 590 incomingShards = incoming.All() 591 existingSet = make(map[uint32]shard.Shard, len(existingShards)) 592 incomingSet = make(map[uint32]shard.Shard, len(incomingShards)) 593 added bool 594 removed bool 595 updated bool 596 ) 597 598 for _, shard := range existingShards { 599 existingSet[shard.ID()] = shard 600 } 601 602 for _, shard := range incomingShards { 603 incomingSet[shard.ID()] = shard 604 existingShard, ok := existingSet[shard.ID()] 605 if !ok { 606 added = true 607 } else if !existingShard.Equals(shard) { 608 updated = true 609 } 610 } 611 612 for shardID := range existingSet { 613 _, ok := incomingSet[shardID] 614 if !ok { 615 removed = true 616 break 617 } 618 } 619 620 return added, removed, updated 621 } 622 623 func (d *db) hasReceivedNewShardsWithLock(incoming sharding.ShardSet) bool { 624 var ( 625 existing = d.shardSet 626 existingSet = make(map[uint32]struct{}, len(existing.AllIDs())) 627 ) 628 629 for _, shard := range existing.AllIDs() { 630 existingSet[shard] = struct{}{} 631 } 632 633 receivedNewShards := false 634 for _, shard := range incoming.AllIDs() { 635 _, ok := existingSet[shard] 636 if !ok { 637 receivedNewShards = true 638 break 639 } 640 } 641 642 return receivedNewShards 643 } 644 645 func (d *db) ShardSet() sharding.ShardSet { 646 d.RLock() 647 defer d.RUnlock() 648 shardSet := d.shardSet 649 return shardSet 650 } 651 652 func (d *db) enqueueBootstrapAsync(onCompleteFn func()) { 653 d.log.Info("enqueuing bootstrap") 654 d.mediator.BootstrapEnqueue(BootstrapEnqueueOptions{ 655 OnCompleteFn: func(_ BootstrapResult) { 656 onCompleteFn() 657 }, 658 }) 659 } 660 661 func (d *db) enqueueBootstrapAsyncWithLock(onCompleteFn func()) { 662 // Only perform a bootstrap if at least one bootstrap has already occurred. This enables 663 // the ability to open the clustered database and assign shardsets to the non-clustered 664 // database when it receives an initial topology (as well as topology changes) without 665 // triggering a bootstrap until an external call initiates a bootstrap with an initial 666 // call to Bootstrap(). After that initial bootstrap, the clustered database will keep 667 // the non-clustered database bootstrapped by assigning it shardsets which will trigger new 668 // bootstraps since d.bootstraps > 0 will be true. 669 if d.bootstraps > 0 { 670 d.log.Info("enqueuing bootstrap with onComplete function") 671 d.mediator.BootstrapEnqueue(BootstrapEnqueueOptions{ 672 OnCompleteFn: func(_ BootstrapResult) { 673 onCompleteFn() 674 }, 675 }) 676 return 677 } 678 679 onCompleteFn() 680 } 681 682 func (d *db) Namespace(id ident.ID) (Namespace, bool) { 683 d.RLock() 684 defer d.RUnlock() 685 return d.namespaces.Get(id) 686 } 687 688 func (d *db) Namespaces() []Namespace { 689 d.RLock() 690 defer d.RUnlock() 691 namespaces := make([]Namespace, 0, d.namespaces.Len()) 692 for _, elem := range d.namespaces.Iter() { 693 namespaces = append(namespaces, elem.Value()) 694 } 695 return namespaces 696 } 697 698 func (d *db) Open() error { 699 d.Lock() 700 defer d.Unlock() 701 // check if db has already been opened 702 if d.state != databaseNotOpen { 703 return errDatabaseAlreadyOpen 704 } 705 d.state = databaseOpen 706 707 // start namespace watch 708 if err := d.nsWatch.Start(); err != nil { 709 return err 710 } 711 712 // Start the wired list 713 if wiredList := d.opts.DatabaseBlockOptions().WiredList(); wiredList != nil { 714 err := wiredList.Start() 715 if err != nil { 716 return err 717 } 718 } 719 720 return d.mediator.Open() 721 } 722 723 func (d *db) terminateWithLock() error { 724 // ensure database is open 725 if d.state == databaseNotOpen { 726 return errDatabaseNotOpen 727 } 728 if d.state == databaseClosed { 729 return errDatabaseAlreadyClosed 730 } 731 d.state = databaseClosed 732 733 // close the mediator 734 if err := d.mediator.Close(); err != nil { 735 return err 736 } 737 738 // stop listening for namespace changes 739 if err := d.nsWatch.Close(); err != nil { 740 return err 741 } 742 743 // Stop the wired list 744 if wiredList := d.opts.DatabaseBlockOptions().WiredList(); wiredList != nil { 745 err := wiredList.Stop() 746 if err != nil { 747 return err 748 } 749 } 750 751 // NB(prateek): Terminate is meant to return quickly, so we rely upon 752 // the gc to clean up any resources held by namespaces, and just set 753 // our reference to the namespaces to nil. 754 d.namespaces.Reallocate() 755 756 // Finally close the commit log 757 return d.commitLog.Close() 758 } 759 760 func (d *db) Terminate() error { 761 // NB(bodu): Disable file ops waits for current fs processes to 762 // finish before disabling. 763 d.mediator.DisableFileOpsAndWait() 764 765 d.Lock() 766 defer d.Unlock() 767 768 return d.terminateWithLock() 769 } 770 771 func (d *db) Close() error { 772 // NB(bodu): Disable file ops waits for current fs processes to 773 // finish before disabling. 774 d.mediator.DisableFileOpsAndWait() 775 776 d.Lock() 777 defer d.Unlock() 778 779 // get a reference to all owned namespaces 780 namespaces := d.ownedNamespacesWithLock() 781 782 // release any database level resources 783 if err := d.terminateWithLock(); err != nil { 784 return err 785 } 786 787 var multiErr xerrors.MultiError 788 for _, ns := range namespaces { 789 multiErr = multiErr.Add(ns.Close()) 790 } 791 792 return multiErr.FinalError() 793 } 794 795 func (d *db) Write( 796 ctx context.Context, 797 namespace ident.ID, 798 id ident.ID, 799 timestamp xtime.UnixNano, 800 value float64, 801 unit xtime.Unit, 802 annotation []byte, 803 ) error { 804 n, err := d.namespaceFor(namespace) 805 if err != nil { 806 d.metrics.unknownNamespaceWrite.Inc(1) 807 return err 808 } 809 810 seriesWrite, err := n.Write(ctx, id, timestamp, value, unit, annotation) 811 if err != nil { 812 return err 813 } 814 815 if !n.Options().WritesToCommitLog() || !seriesWrite.WasWritten { 816 return nil 817 } 818 819 dp := ts.Datapoint{ 820 TimestampNanos: timestamp, 821 Value: value, 822 } 823 824 return d.commitLog.Write(ctx, seriesWrite.Series, dp, unit, annotation) 825 } 826 827 func (d *db) WriteTagged( 828 ctx context.Context, 829 namespace ident.ID, 830 id ident.ID, 831 tagResolver convert.TagMetadataResolver, 832 timestamp xtime.UnixNano, 833 value float64, 834 unit xtime.Unit, 835 annotation []byte, 836 ) error { 837 n, err := d.namespaceFor(namespace) 838 if err != nil { 839 d.metrics.unknownNamespaceWriteTagged.Inc(1) 840 return err 841 } 842 843 seriesWrite, err := n.WriteTagged(ctx, id, tagResolver, timestamp, value, unit, annotation) 844 if err != nil { 845 return err 846 } 847 848 if !n.Options().WritesToCommitLog() || !seriesWrite.WasWritten { 849 return nil 850 } 851 852 dp := ts.Datapoint{ 853 TimestampNanos: timestamp, 854 Value: value, 855 } 856 857 return d.commitLog.Write(ctx, seriesWrite.Series, dp, unit, annotation) 858 } 859 860 func (d *db) BatchWriter(namespace ident.ID, batchSize int) (writes.BatchWriter, error) { 861 n, err := d.namespaceFor(namespace) 862 if err != nil { 863 d.metrics.unknownNamespaceBatchWriter.Inc(1) 864 return nil, err 865 } 866 867 var ( 868 nsID = n.ID() 869 batchWriter = d.writeBatchPool.Get() 870 ) 871 batchWriter.Reset(batchSize, nsID) 872 return batchWriter, nil 873 } 874 875 func (d *db) WriteBatch( 876 ctx context.Context, 877 namespace ident.ID, 878 writer writes.BatchWriter, 879 errHandler IndexedErrorHandler, 880 ) error { 881 return d.writeBatch(ctx, namespace, writer, errHandler, false) 882 } 883 884 func (d *db) WriteTaggedBatch( 885 ctx context.Context, 886 namespace ident.ID, 887 writer writes.BatchWriter, 888 errHandler IndexedErrorHandler, 889 ) error { 890 return d.writeBatch(ctx, namespace, writer, errHandler, true) 891 } 892 893 func (d *db) writeBatch( 894 ctx context.Context, 895 namespace ident.ID, 896 writer writes.BatchWriter, 897 errHandler IndexedErrorHandler, 898 tagged bool, 899 ) error { 900 n, err := d.namespaceFor(namespace) 901 if err != nil { 902 if tagged { 903 d.metrics.unknownNamespaceWriteTaggedBatch.Inc(1) 904 } else { 905 d.metrics.unknownNamespaceWriteBatch.Inc(1) 906 } 907 return err 908 } 909 910 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBWriteBatch) 911 if sampled { 912 sp.LogFields( 913 opentracinglog.String("namespace", namespace.String()), 914 opentracinglog.Bool("tagged", tagged), 915 ) 916 } 917 918 defer sp.Finish() 919 writes, ok := writer.(writes.WriteBatch) 920 if !ok { 921 return errWriterDoesNotImplementWriteBatch 922 } 923 924 iter := writes.Iter() 925 for i, write := range iter { 926 var ( 927 seriesWrite SeriesWrite 928 err error 929 ) 930 931 if tagged { 932 seriesWrite, err = n.WriteTagged( 933 ctx, 934 write.Write.Series.ID, 935 convert.NewEncodedTagsMetadataResolver(write.EncodedTags), 936 write.Write.Datapoint.TimestampNanos, 937 write.Write.Datapoint.Value, 938 write.Write.Unit, 939 write.Write.Annotation, 940 ) 941 } else { 942 seriesWrite, err = n.Write( 943 ctx, 944 write.Write.Series.ID, 945 write.Write.Datapoint.TimestampNanos, 946 write.Write.Datapoint.Value, 947 write.Write.Unit, 948 write.Write.Annotation, 949 ) 950 } 951 if err != nil { 952 // Return errors with the original index provided by the caller so they 953 // can associate the error with the write that caused it. 954 errHandler.HandleError(write.OriginalIndex, err) 955 writes.SetError(i, err) 956 continue 957 } 958 959 // Need to set the outcome in the success case so the commitlog gets the 960 // updated series object which contains identifiers (like the series ID) 961 // whose lifecycle lives longer than the span of this request, making them 962 // safe for use by the async commitlog. Need to set the outcome in the 963 // error case so that the commitlog knows to skip this entry. 964 writes.SetSeries(i, seriesWrite.Series) 965 966 if !seriesWrite.WasWritten { 967 // This series has no additional information that needs to be written to 968 // the commit log; set this series to skip writing to the commit log. 969 writes.SetSkipWrite(i) 970 } 971 972 if seriesWrite.NeedsIndex { 973 writes.SetPendingIndex(i, seriesWrite.PendingIndexInsert) 974 } 975 } 976 977 // Now insert all pending index inserts together in one go 978 // to limit lock contention. 979 if pending := writes.PendingIndex(); len(pending) > 0 { 980 err := n.WritePendingIndexInserts(pending) 981 if err != nil { 982 // Mark those as pending index with an error. 983 // Note: this is an invariant error, queueing should never fail 984 // when so it's fine to fail all these entries if we can't 985 // write pending index inserts. 986 for i, write := range iter { 987 if write.PendingIndex { 988 errHandler.HandleError(write.OriginalIndex, err) 989 writes.SetError(i, err) 990 } 991 } 992 } 993 } 994 995 if !n.Options().WritesToCommitLog() { 996 // Finalize here because we can't rely on the commitlog to do it since 997 // we're not using it. 998 writes.Finalize() 999 return nil 1000 } 1001 1002 return d.commitLog.WriteBatch(ctx, writes) 1003 } 1004 1005 func (d *db) QueryIDs( 1006 ctx context.Context, 1007 namespace ident.ID, 1008 query index.Query, 1009 opts index.QueryOptions, 1010 ) (index.QueryResult, error) { 1011 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBQueryIDs) 1012 if sampled { 1013 sp.LogFields( 1014 opentracinglog.String("query", query.String()), 1015 opentracinglog.String("namespace", namespace.String()), 1016 opentracinglog.Int("seriesLimit", opts.SeriesLimit), 1017 opentracinglog.Int("docsLimit", opts.DocsLimit), 1018 xopentracing.Time("start", opts.StartInclusive.ToTime()), 1019 xopentracing.Time("end", opts.EndExclusive.ToTime()), 1020 ) 1021 } 1022 defer sp.Finish() 1023 1024 // Check if exceeding query limits at very beginning of 1025 // query path to abandon as early as possible. 1026 if err := d.queryLimits.AnyFetchExceeded(); err != nil { 1027 return index.QueryResult{}, err 1028 } 1029 1030 n, err := d.namespaceFor(namespace) 1031 if err != nil { 1032 sp.LogFields(opentracinglog.Error(err)) 1033 d.metrics.unknownNamespaceQueryIDs.Inc(1) 1034 return index.QueryResult{}, err 1035 } 1036 1037 return n.QueryIDs(ctx, query, opts) 1038 } 1039 1040 func (d *db) AggregateQuery( 1041 ctx context.Context, 1042 namespace ident.ID, 1043 query index.Query, 1044 aggResultOpts index.AggregationOptions, 1045 ) (index.AggregateQueryResult, error) { 1046 n, err := d.namespaceFor(namespace) 1047 if err != nil { 1048 d.metrics.unknownNamespaceQueryIDs.Inc(1) 1049 return index.AggregateQueryResult{}, err 1050 } 1051 1052 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBAggregateQuery) 1053 if sampled { 1054 sp.LogFields( 1055 opentracinglog.String("query", query.String()), 1056 opentracinglog.String("namespace", namespace.String()), 1057 opentracinglog.Int("seriesLimit", aggResultOpts.QueryOptions.SeriesLimit), 1058 opentracinglog.Int("docsLimit", aggResultOpts.QueryOptions.DocsLimit), 1059 xopentracing.Time("start", aggResultOpts.QueryOptions.StartInclusive.ToTime()), 1060 xopentracing.Time("end", aggResultOpts.QueryOptions.EndExclusive.ToTime()), 1061 ) 1062 } 1063 1064 defer sp.Finish() 1065 return n.AggregateQuery(ctx, query, aggResultOpts) 1066 } 1067 1068 func (d *db) ReadEncoded( 1069 ctx context.Context, 1070 namespace ident.ID, 1071 id ident.ID, 1072 start, end xtime.UnixNano, 1073 ) (series.BlockReaderIter, error) { 1074 n, err := d.namespaceFor(namespace) 1075 if err != nil { 1076 d.metrics.unknownNamespaceRead.Inc(1) 1077 return nil, err 1078 } 1079 1080 return n.ReadEncoded(ctx, id, start, end) 1081 } 1082 1083 func (d *db) FetchBlocks( 1084 ctx context.Context, 1085 namespace ident.ID, 1086 shardID uint32, 1087 id ident.ID, 1088 starts []xtime.UnixNano, 1089 ) ([]block.FetchBlockResult, error) { 1090 n, err := d.namespaceFor(namespace) 1091 if err != nil { 1092 d.metrics.unknownNamespaceFetchBlocks.Inc(1) 1093 return nil, xerrors.NewInvalidParamsError(err) 1094 } 1095 1096 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBFetchBlocks) 1097 if sampled { 1098 sp.LogFields( 1099 opentracinglog.String("namespace", namespace.String()), 1100 opentracinglog.Uint32("shardID", shardID), 1101 opentracinglog.String("id", id.String()), 1102 ) 1103 } 1104 1105 defer sp.Finish() 1106 return n.FetchBlocks(ctx, shardID, id, starts) 1107 } 1108 1109 func (d *db) FetchBlocksMetadataV2( 1110 ctx context.Context, 1111 namespace ident.ID, 1112 shardID uint32, 1113 start, end xtime.UnixNano, 1114 limit int64, 1115 pageToken PageToken, 1116 opts block.FetchBlocksMetadataOptions, 1117 ) (block.FetchBlocksMetadataResults, PageToken, error) { 1118 n, err := d.namespaceFor(namespace) 1119 if err != nil { 1120 d.metrics.unknownNamespaceFetchBlocksMetadata.Inc(1) 1121 return nil, nil, xerrors.NewInvalidParamsError(err) 1122 } 1123 1124 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBFetchBlocksMetadataV2) 1125 if sampled { 1126 sp.LogFields( 1127 opentracinglog.String("namespace", namespace.String()), 1128 opentracinglog.Uint32("shardID", shardID), 1129 xopentracing.Time("start", start.ToTime()), 1130 xopentracing.Time("end", end.ToTime()), 1131 opentracinglog.Int64("limit", limit), 1132 ) 1133 } 1134 1135 defer sp.Finish() 1136 return n.FetchBlocksMetadataV2(ctx, shardID, start, end, limit, 1137 pageToken, opts) 1138 } 1139 1140 func (d *db) Bootstrap() error { 1141 d.Lock() 1142 d.bootstraps++ 1143 d.Unlock() 1144 1145 // NB: We need to acquire bootstrapMutex to protect from receiving new shardSets or namespaces during 1146 // bootstrapping. 1147 d.bootstrapMutex.Lock() 1148 _, err := d.mediator.Bootstrap() 1149 d.bootstrapMutex.Unlock() 1150 return err 1151 } 1152 1153 func (d *db) IsBootstrapped() bool { 1154 return d.mediator.IsBootstrapped() 1155 } 1156 1157 // IsBootstrappedAndDurable should only return true if the following conditions are met: 1158 // 1. The database is bootstrapped. 1159 // 2. The last successful snapshot began AFTER the last bootstrap completed. 1160 // 1161 // Those two conditions should be sufficient to ensure that after a placement change the 1162 // node will be able to bootstrap any and all data from its local disk, however, for posterity 1163 // we also perform the following check: 1164 // 3. The last bootstrap completed AFTER the shardset was last assigned. 1165 func (d *db) IsBootstrappedAndDurable() bool { 1166 isBootstrapped := d.mediator.IsBootstrapped() 1167 if !isBootstrapped { 1168 d.log.Debug("not bootstrapped and durable because: not bootstrapped") 1169 return false 1170 } 1171 1172 lastBootstrapCompletionTimeNano, ok := d.mediator.LastBootstrapCompletionTime() 1173 if !ok { 1174 d.log.Debug("not bootstrapped and durable because: no last bootstrap completion time", 1175 zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTimeNano.ToTime())) 1176 1177 return false 1178 } 1179 1180 lastSnapshotStartTime, ok := d.mediator.LastSuccessfulSnapshotStartTime() 1181 if !ok { 1182 d.log.Debug("not bootstrapped and durable because: no last snapshot start time", 1183 zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTimeNano.ToTime()), 1184 zap.Time("lastSnapshotStartTime", lastSnapshotStartTime.ToTime()), 1185 ) 1186 return false 1187 } 1188 1189 var ( 1190 lastBootstrapCompletionTime = lastBootstrapCompletionTimeNano.ToTime() 1191 hasSnapshottedPostBootstrap = lastSnapshotStartTime.After(lastBootstrapCompletionTimeNano) 1192 hasBootstrappedSinceReceivingNewShards = lastBootstrapCompletionTime.After(d.lastReceivedNewShards) || 1193 lastBootstrapCompletionTime.Equal(d.lastReceivedNewShards) 1194 isBootstrappedAndDurable = hasSnapshottedPostBootstrap && 1195 hasBootstrappedSinceReceivingNewShards 1196 ) 1197 1198 if !isBootstrappedAndDurable { 1199 d.log.Debug( 1200 "not bootstrapped and durable because: has not snapshotted post bootstrap and/or has not bootstrapped since receiving new shards", 1201 zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTime), 1202 zap.Time("lastSnapshotStartTime", lastSnapshotStartTime.ToTime()), 1203 zap.Time("lastReceivedNewShards", d.lastReceivedNewShards), 1204 ) 1205 return false 1206 } 1207 1208 return true 1209 } 1210 1211 func (d *db) Repair() error { 1212 return d.repairer.Repair() 1213 } 1214 1215 func (d *db) Truncate(namespace ident.ID) (int64, error) { 1216 n, err := d.namespaceFor(namespace) 1217 if err != nil { 1218 return 0, err 1219 } 1220 return n.Truncate() 1221 } 1222 1223 func (d *db) IsOverloaded() bool { 1224 queueSize := float64(d.commitLog.QueueLength()) 1225 queueCapacity := float64(d.opts.CommitLogOptions().BacklogQueueSize()) 1226 return queueSize >= commitLogQueueCapacityOverloadedFactor*queueCapacity 1227 } 1228 1229 func (d *db) BootstrapState() DatabaseBootstrapState { 1230 nsBootstrapStates := NamespaceBootstrapStates{} 1231 1232 d.RLock() 1233 for _, n := range d.namespaces.Iter() { 1234 ns := n.Value() 1235 nsBootstrapStates[ns.ID().String()] = ns.ShardBootstrapState() 1236 } 1237 d.RUnlock() 1238 1239 return DatabaseBootstrapState{ 1240 NamespaceBootstrapStates: nsBootstrapStates, 1241 } 1242 } 1243 1244 func (d *db) FlushState( 1245 namespace ident.ID, 1246 shardID uint32, 1247 blockStart xtime.UnixNano, 1248 ) (fileOpState, error) { 1249 n, err := d.namespaceFor(namespace) 1250 if err != nil { 1251 return fileOpState{}, err 1252 } 1253 return n.FlushState(shardID, blockStart) 1254 } 1255 1256 func (d *db) namespaceFor(namespace ident.ID) (databaseNamespace, error) { 1257 d.RLock() 1258 n, exists := d.namespaces.Get(namespace) 1259 d.RUnlock() 1260 1261 if !exists { 1262 return nil, dberrors.NewUnknownNamespaceError(namespace.String()) 1263 } 1264 return n, nil 1265 } 1266 1267 func (d *db) ownedNamespacesWithLock() []databaseNamespace { 1268 namespaces := make([]databaseNamespace, 0, d.namespaces.Len()) 1269 for _, n := range d.namespaces.Iter() { 1270 namespaces = append(namespaces, n.Value()) 1271 } 1272 return namespaces 1273 } 1274 1275 func (d *db) OwnedNamespaces() ([]databaseNamespace, error) { 1276 d.RLock() 1277 defer d.RUnlock() 1278 if d.state == databaseClosed { 1279 return nil, errDatabaseIsClosed 1280 } 1281 return d.ownedNamespacesWithLock(), nil 1282 } 1283 1284 func (d *db) AggregateTiles( 1285 ctx context.Context, 1286 sourceNsID, 1287 targetNsID ident.ID, 1288 opts AggregateTilesOptions, 1289 ) (int64, error) { 1290 jobInProgress := opts.InsOptions.MetricsScope().Gauge("aggregations-in-progress") 1291 atomic.AddInt32(&aggregationsInProgress, 1) 1292 jobInProgress.Update(float64(aggregationsInProgress)) 1293 defer func() { 1294 atomic.AddInt32(&aggregationsInProgress, -1) 1295 jobInProgress.Update(float64(aggregationsInProgress)) 1296 }() 1297 1298 ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBAggregateTiles) 1299 if sampled { 1300 sp.LogFields( 1301 opentracinglog.String("sourceNamespace", sourceNsID.String()), 1302 opentracinglog.String("targetNamespace", targetNsID.String()), 1303 xopentracing.Time("start", opts.Start.ToTime()), 1304 xopentracing.Time("end", opts.End.ToTime()), 1305 xopentracing.Duration("step", opts.Step), 1306 ) 1307 } 1308 defer sp.Finish() 1309 1310 sourceNs, err := d.namespaceFor(sourceNsID) 1311 if err != nil { 1312 d.metrics.unknownNamespaceRead.Inc(1) 1313 return 0, err 1314 } 1315 1316 targetNs, err := d.namespaceFor(targetNsID) 1317 if err != nil { 1318 d.metrics.unknownNamespaceRead.Inc(1) 1319 return 0, err 1320 } 1321 1322 processedTileCount, err := targetNs.AggregateTiles(ctx, sourceNs, opts) 1323 if err != nil { 1324 d.log.Error("error writing large tiles", 1325 zap.String("sourceNs", sourceNsID.String()), 1326 zap.String("targetNs", targetNsID.String()), 1327 zap.Error(err), 1328 ) 1329 reportAggregateTilesErrors(opts.InsOptions.MetricsScope(), err) 1330 } 1331 1332 return processedTileCount, err 1333 } 1334 1335 func (d *db) nextIndex() uint64 { 1336 // Start with index at "1" so that a default "uniqueIndex" 1337 // with "0" is invalid (AddUint64 will return the new value). 1338 return atomic.AddUint64(&d.created, 1) 1339 } 1340 1341 type tsIDs []ident.ID 1342 1343 func (t tsIDs) String() (string, error) { 1344 var buf bytes.Buffer 1345 buf.WriteRune('[') 1346 for idx, id := range t { 1347 if idx != 0 { 1348 if _, err := buf.WriteString(", "); err != nil { 1349 return "", err 1350 } 1351 } 1352 if _, err := buf.WriteString(id.String()); err != nil { 1353 return "", err 1354 } 1355 } 1356 buf.WriteRune(']') 1357 return buf.String(), nil 1358 } 1359 1360 type metadatas []namespace.Metadata 1361 1362 func (m metadatas) String() (string, error) { 1363 var buf bytes.Buffer 1364 buf.WriteRune('[') 1365 for idx, md := range m { 1366 if idx != 0 { 1367 if _, err := buf.WriteString(", "); err != nil { 1368 return "", err 1369 } 1370 } 1371 if _, err := buf.WriteString(md.ID().String()); err != nil { 1372 return "", err 1373 } 1374 } 1375 buf.WriteRune(']') 1376 return buf.String(), nil 1377 } 1378 1379 // NewAggregateTilesOptions creates new AggregateTilesOptions. 1380 func NewAggregateTilesOptions( 1381 start, end xtime.UnixNano, 1382 step time.Duration, 1383 targetNsID ident.ID, 1384 process AggregateTilesProcess, 1385 memorizeMetricTypes, backfillMetricTypes bool, 1386 metricTypeByName map[string]annotation.Payload, 1387 insOpts instrument.Options, 1388 ) (AggregateTilesOptions, error) { 1389 if !end.After(start) { 1390 return AggregateTilesOptions{}, fmt.Errorf("AggregateTilesOptions.End must be after Start, got %s - %s", start, end) 1391 } 1392 1393 if step <= 0 { 1394 return AggregateTilesOptions{}, fmt.Errorf("AggregateTilesOptions.Step must be positive, got %s", step) 1395 } 1396 1397 if (memorizeMetricTypes || backfillMetricTypes) && metricTypeByName == nil { 1398 return AggregateTilesOptions{}, errors.New( 1399 "metricTypeByName must not be nil when memorizeMetricTypes or backfillMetricTypes is true") 1400 } 1401 1402 scope := insOpts.MetricsScope().SubScope("computed-namespace") 1403 insOpts = insOpts.SetMetricsScope(scope.Tagged(map[string]string{ 1404 "target-namespace": targetNsID.String(), 1405 "process": process.String(), 1406 })) 1407 1408 return AggregateTilesOptions{ 1409 Start: start, 1410 End: end, 1411 Step: step, 1412 Process: process, 1413 1414 MemorizeMetricTypes: memorizeMetricTypes, 1415 BackfillMetricTypes: backfillMetricTypes, 1416 MetricTypeByName: metricTypeByName, 1417 1418 InsOptions: insOpts, 1419 }, nil 1420 } 1421 1422 func reportAggregateTilesErrors(scope tally.Scope, err error) { 1423 errorType := "not-categorized" 1424 if xerrors.Is(err, client.ErrSessionStatusNotOpen) { 1425 errorType = "connection-to-peer" 1426 } 1427 scope.Tagged(map[string]string{"error-type": errorType}).Counter("aggregate-tiles-failed").Inc(1) 1428 }