github.com/m3db/m3@v1.5.0/src/dbnode/storage/database.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package storage
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  	"sync"
    28  	"sync/atomic"
    29  	"time"
    30  
    31  	opentracinglog "github.com/opentracing/opentracing-go/log"
    32  	"github.com/uber-go/tally"
    33  	"go.uber.org/zap"
    34  
    35  	"github.com/m3db/m3/src/cluster/shard"
    36  	"github.com/m3db/m3/src/dbnode/client"
    37  	"github.com/m3db/m3/src/dbnode/generated/proto/annotation"
    38  	"github.com/m3db/m3/src/dbnode/namespace"
    39  	"github.com/m3db/m3/src/dbnode/persist/fs/commitlog"
    40  	"github.com/m3db/m3/src/dbnode/sharding"
    41  	"github.com/m3db/m3/src/dbnode/storage/block"
    42  	dberrors "github.com/m3db/m3/src/dbnode/storage/errors"
    43  	"github.com/m3db/m3/src/dbnode/storage/index"
    44  	"github.com/m3db/m3/src/dbnode/storage/index/convert"
    45  	"github.com/m3db/m3/src/dbnode/storage/limits"
    46  	"github.com/m3db/m3/src/dbnode/storage/series"
    47  	"github.com/m3db/m3/src/dbnode/tracepoint"
    48  	"github.com/m3db/m3/src/dbnode/ts"
    49  	"github.com/m3db/m3/src/dbnode/ts/writes"
    50  	"github.com/m3db/m3/src/x/clock"
    51  	"github.com/m3db/m3/src/x/context"
    52  	xerrors "github.com/m3db/m3/src/x/errors"
    53  	"github.com/m3db/m3/src/x/ident"
    54  	"github.com/m3db/m3/src/x/instrument"
    55  	xopentracing "github.com/m3db/m3/src/x/opentracing"
    56  	xtime "github.com/m3db/m3/src/x/time"
    57  )
    58  
    59  const (
    60  	// The database is considered overloaded if the queue size is 90% or more
    61  	// of the maximum capacity. We set this below 1.0 because checking the queue
    62  	// lengthy is racey so we're gonna burst past this value anyways and the buffer
    63  	// gives us breathing room to recover.
    64  	commitLogQueueCapacityOverloadedFactor = 0.9
    65  )
    66  
    67  var (
    68  	// errDatabaseAlreadyOpen raised when trying to open a database that is already open.
    69  	errDatabaseAlreadyOpen = errors.New("database is already open")
    70  
    71  	// errDatabaseNotOpen raised when trying to close a database that is not open.
    72  	errDatabaseNotOpen = errors.New("database is not open")
    73  
    74  	// errDatabaseAlreadyClosed raised when trying to open a database that is already closed.
    75  	errDatabaseAlreadyClosed = errors.New("database is already closed")
    76  
    77  	// errDatabaseIsClosed raised when trying to perform an action that requires an open database.
    78  	errDatabaseIsClosed = errors.New("database is closed")
    79  
    80  	// errWriterDoesNotImplementWriteBatch is raised when the provided ts.BatchWriter does not implement
    81  	// ts.WriteBatch.
    82  	errWriterDoesNotImplementWriteBatch = errors.New("provided writer does not implement ts.WriteBatch")
    83  	aggregationsInProgress              int32
    84  )
    85  
    86  type databaseState int
    87  
    88  const (
    89  	databaseNotOpen databaseState = iota
    90  	databaseOpen
    91  	databaseClosed
    92  )
    93  
    94  // increasingIndex provides a monotonically increasing index for new series
    95  type increasingIndex interface {
    96  	nextIndex() uint64
    97  }
    98  
    99  type db struct {
   100  	sync.RWMutex
   101  	bootstrapMutex sync.Mutex
   102  	opts           Options
   103  	nowFn          clock.NowFn
   104  
   105  	nsWatch                namespace.NamespaceWatch
   106  	namespaces             *databaseNamespacesMap
   107  	runtimeOptionsRegistry namespace.RuntimeOptionsManagerRegistry
   108  
   109  	commitLog commitlog.CommitLog
   110  
   111  	state    databaseState
   112  	mediator databaseMediator
   113  	repairer databaseRepairer
   114  
   115  	created    uint64
   116  	bootstraps int
   117  
   118  	shardSet              sharding.ShardSet
   119  	lastReceivedNewShards time.Time
   120  
   121  	scope   tally.Scope
   122  	metrics databaseMetrics
   123  	log     *zap.Logger
   124  
   125  	writeBatchPool *writes.WriteBatchPool
   126  
   127  	queryLimits limits.QueryLimits
   128  }
   129  
   130  type databaseMetrics struct {
   131  	unknownNamespaceRead                tally.Counter
   132  	unknownNamespaceWrite               tally.Counter
   133  	unknownNamespaceWriteTagged         tally.Counter
   134  	unknownNamespaceBatchWriter         tally.Counter
   135  	unknownNamespaceWriteBatch          tally.Counter
   136  	unknownNamespaceWriteTaggedBatch    tally.Counter
   137  	unknownNamespaceFetchBlocks         tally.Counter
   138  	unknownNamespaceFetchBlocksMetadata tally.Counter
   139  	unknownNamespaceQueryIDs            tally.Counter
   140  	errQueryIDsIndexDisabled            tally.Counter
   141  	errWriteTaggedIndexDisabled         tally.Counter
   142  	pendingNamespaceChange              tally.Gauge
   143  }
   144  
   145  func newDatabaseMetrics(scope tally.Scope) databaseMetrics {
   146  	unknownNamespaceScope := scope.SubScope("unknown-namespace")
   147  	indexDisabledScope := scope.SubScope("index-disabled")
   148  	return databaseMetrics{
   149  		unknownNamespaceRead:                unknownNamespaceScope.Counter("read"),
   150  		unknownNamespaceWrite:               unknownNamespaceScope.Counter("write"),
   151  		unknownNamespaceWriteTagged:         unknownNamespaceScope.Counter("write-tagged"),
   152  		unknownNamespaceBatchWriter:         unknownNamespaceScope.Counter("batch-writer"),
   153  		unknownNamespaceWriteBatch:          unknownNamespaceScope.Counter("write-batch"),
   154  		unknownNamespaceWriteTaggedBatch:    unknownNamespaceScope.Counter("write-tagged-batch"),
   155  		unknownNamespaceFetchBlocks:         unknownNamespaceScope.Counter("fetch-blocks"),
   156  		unknownNamespaceFetchBlocksMetadata: unknownNamespaceScope.Counter("fetch-blocks-metadata"),
   157  		unknownNamespaceQueryIDs:            unknownNamespaceScope.Counter("query-ids"),
   158  		errQueryIDsIndexDisabled:            indexDisabledScope.Counter("err-query-ids"),
   159  		errWriteTaggedIndexDisabled:         indexDisabledScope.Counter("err-write-tagged"),
   160  		pendingNamespaceChange:              scope.Gauge("pending-namespace-change"),
   161  	}
   162  }
   163  
   164  // NewDatabase creates a new time series database.
   165  func NewDatabase(
   166  	shardSet sharding.ShardSet,
   167  	opts Options,
   168  ) (Database, error) {
   169  	if err := opts.Validate(); err != nil {
   170  		return nil, fmt.Errorf("invalid options: %v", err)
   171  	}
   172  
   173  	commitLog, err := commitlog.NewCommitLog(opts.CommitLogOptions())
   174  	if err != nil {
   175  		return nil, err
   176  	}
   177  	if err := commitLog.Open(); err != nil {
   178  		return nil, err
   179  	}
   180  
   181  	var (
   182  		iopts  = opts.InstrumentOptions()
   183  		scope  = iopts.MetricsScope().SubScope("database")
   184  		logger = iopts.Logger()
   185  		nowFn  = opts.ClockOptions().NowFn()
   186  	)
   187  
   188  	d := &db{
   189  		opts:                   opts,
   190  		nowFn:                  nowFn,
   191  		shardSet:               shardSet,
   192  		lastReceivedNewShards:  nowFn(),
   193  		namespaces:             newDatabaseNamespacesMap(databaseNamespacesMapOptions{}),
   194  		runtimeOptionsRegistry: opts.NamespaceRuntimeOptionsManagerRegistry(),
   195  		commitLog:              commitLog,
   196  		scope:                  scope,
   197  		metrics:                newDatabaseMetrics(scope),
   198  		log:                    logger,
   199  		writeBatchPool:         opts.WriteBatchPool(),
   200  		queryLimits:            opts.IndexOptions().QueryLimits(),
   201  	}
   202  
   203  	databaseIOpts := iopts.SetMetricsScope(scope)
   204  
   205  	// initialize namespaces
   206  	nsInit := opts.NamespaceInitializer()
   207  
   208  	logger.Info("creating namespaces watch")
   209  	nsReg, err := nsInit.Init()
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  
   214  	// get a namespace watch
   215  	watch, err := nsReg.Watch()
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  
   220  	// Wait till first namespaces value is received and set the value.
   221  	// Its important that this happens before the mediator is started to prevent
   222  	// a race condition where the namespaces haven't been initialized yet and
   223  	// OwnedNamespaces() returns an empty slice which makes the cleanup logic
   224  	// in the background Tick think it can clean up files that it shouldn't.
   225  	logger.Info("resolving namespaces with namespace watch")
   226  	<-watch.C()
   227  	dbUpdater := func(namespaces namespace.Map) error {
   228  		return d.UpdateOwnedNamespaces(namespaces)
   229  	}
   230  	d.nsWatch = namespace.NewNamespaceWatch(dbUpdater, watch, databaseIOpts)
   231  	nsMap := watch.Get()
   232  	if err := d.UpdateOwnedNamespaces(nsMap); err != nil {
   233  		// Log the error and proceed in case some namespace is miss-configured, e.g. missing schema.
   234  		// Miss-configured namespace won't be initialized, should not prevent database
   235  		// or other namespaces from getting initialized.
   236  		d.log.Error("failed to update owned namespaces",
   237  			zap.Error(err))
   238  	}
   239  
   240  	d.mediator, err = newMediator(
   241  		d, commitLog, opts.SetInstrumentOptions(databaseIOpts))
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	d.repairer = newNoopDatabaseRepairer()
   247  	if opts.RepairEnabled() {
   248  		d.repairer, err = newDatabaseRepairer(d, opts)
   249  		if err != nil {
   250  			return nil, err
   251  		}
   252  		err = d.mediator.RegisterBackgroundProcess(d.repairer)
   253  		if err != nil {
   254  			return nil, err
   255  		}
   256  	}
   257  
   258  	for _, fn := range opts.BackgroundProcessFns() {
   259  		process, err := fn(d, opts)
   260  		if err != nil {
   261  			return nil, err
   262  		}
   263  		err = d.mediator.RegisterBackgroundProcess(process)
   264  		if err != nil {
   265  			return nil, err
   266  		}
   267  	}
   268  
   269  	return d, nil
   270  }
   271  
   272  func (d *db) UpdateOwnedNamespaces(newNamespaces namespace.Map) error {
   273  	if newNamespaces == nil {
   274  		return nil
   275  	}
   276  	// NB: Use bootstrapMutex to protect from competing calls.
   277  	asyncUnlock := false
   278  	d.bootstrapMutex.Lock()
   279  	defer func() {
   280  		if !asyncUnlock {
   281  			d.bootstrapMutex.Unlock()
   282  		}
   283  	}()
   284  
   285  	// Always update schema registry before owned namespaces.
   286  	if err := namespace.UpdateSchemaRegistry(newNamespaces, d.opts.SchemaRegistry(), d.log); err != nil {
   287  		// Log schema update error and proceed.
   288  		// In a multi-namespace database, schema update failure for one namespace be isolated.
   289  		d.log.Error("failed to update schema registry", zap.Error(err))
   290  	}
   291  
   292  	// Always update the runtime options if they were set so that correct
   293  	// runtime options are set in the runtime options registry before namespaces
   294  	// are actually created.
   295  	for _, namespaceMetadata := range newNamespaces.Metadatas() {
   296  		id := namespaceMetadata.ID().String()
   297  		runtimeOptsMgr := d.runtimeOptionsRegistry.RuntimeOptionsManager(id)
   298  		currRuntimeOpts := runtimeOptsMgr.Get()
   299  		setRuntimeOpts := namespaceMetadata.Options().RuntimeOptions()
   300  		if !currRuntimeOpts.Equal(setRuntimeOpts) {
   301  			runtimeOptsMgr.Update(setRuntimeOpts)
   302  		}
   303  	}
   304  
   305  	// NB: Can hold lock since all long-running tasks are enqueued to run
   306  	// async while holding the lock.
   307  	d.Lock()
   308  	defer d.Unlock()
   309  
   310  	removes, adds, updates := d.namespaceDeltaWithLock(newNamespaces)
   311  	if err := d.logNamespaceUpdate(removes, adds, updates); err != nil {
   312  		d.log.Error("unable to log namespace updates", zap.Error(err))
   313  		return err
   314  	}
   315  
   316  	// log that updates and removals are skipped
   317  	if len(removes) > 0 || len(updates) > 0 {
   318  		d.metrics.pendingNamespaceChange.Update(1)
   319  		d.log.Warn("skipping namespace removals and updates " +
   320  			"(except schema updates and runtime options), " +
   321  			"restart the process if you want changes to take effect")
   322  	}
   323  
   324  	if len(adds) > 0 {
   325  		if d.bootstraps == 0 || !d.mediatorIsOpenWithLock() {
   326  			// If no bootstraps yet or mediator is not open we can just
   327  			// add the namespaces and optionally enqueue bootstrap (which is
   328  			// async) since no file operations can be in place since
   329  			// no bootstrap and/or mediator is not open.
   330  			if err := d.addNamespacesWithLock(adds); err != nil {
   331  				d.log.Error("unable to add namespaces", zap.Error(err))
   332  				return err
   333  			}
   334  
   335  			if d.bootstraps > 0 {
   336  				// If already bootstrapped before, enqueue another
   337  				// bootstrap (asynchronously, ok to trigger holding lock).
   338  				asyncUnlock = true
   339  				d.enqueueBootstrapAsync(d.bootstrapMutex.Unlock)
   340  			}
   341  
   342  			return nil
   343  		}
   344  
   345  		// NB: mediator is opened, we need to disable fileOps and wait for all the background processes to complete
   346  		// so that we could update namespaces safely. Otherwise, there is a high chance in getting
   347  		// invariant violation panic because cold/warm flush will receive new namespaces
   348  		// in the middle of their operations.
   349  		d.Unlock() // Don't hold the lock while we wait for file ops.
   350  		d.disableFileOpsAndWait()
   351  		d.Lock() // Reacquire lock after waiting.
   352  
   353  		// Add any namespaces marked for addition.
   354  		if err := d.addNamespacesWithLock(adds); err != nil {
   355  			d.log.Error("unable to add namespaces", zap.Error(err))
   356  			d.enableFileOps()
   357  			return err
   358  		}
   359  
   360  		// Enqueue bootstrap and enable file ops when bootstrap is completed.
   361  		asyncUnlock = true
   362  		d.enqueueBootstrapAsyncWithLock(
   363  			func() {
   364  				d.enableFileOps()
   365  				d.bootstrapMutex.Unlock()
   366  			})
   367  	}
   368  	return nil
   369  }
   370  
   371  func (d *db) mediatorIsOpenWithLock() bool {
   372  	if d.mediator == nil {
   373  		return false
   374  	}
   375  	return d.mediator.IsOpen()
   376  }
   377  
   378  func (d *db) disableFileOpsAndWait() {
   379  	if mediator := d.mediator; mediator != nil && mediator.IsOpen() {
   380  		d.log.Info("waiting for file ops to be disabled")
   381  		mediator.DisableFileOpsAndWait()
   382  	}
   383  }
   384  
   385  func (d *db) enableFileOps() {
   386  	if mediator := d.mediator; mediator != nil && mediator.IsOpen() {
   387  		d.log.Info("enabling file ops")
   388  		mediator.EnableFileOps()
   389  	}
   390  }
   391  
   392  func (d *db) namespaceDeltaWithLock(newNamespaces namespace.Map) ([]ident.ID, []namespace.Metadata, []namespace.Metadata) {
   393  	var (
   394  		existing = d.namespaces
   395  		removes  []ident.ID
   396  		adds     []namespace.Metadata
   397  		updates  []namespace.Metadata
   398  	)
   399  
   400  	// check if existing namespaces exist in newNamespaces
   401  	for _, entry := range existing.Iter() {
   402  		ns := entry.Value()
   403  		newMd, err := newNamespaces.Get(ns.ID())
   404  		// if a namespace doesn't exist in newNamespaces, mark for removal
   405  		if err != nil {
   406  			removes = append(removes, ns.ID())
   407  			continue
   408  		}
   409  
   410  		// if namespace exists in newNamespaces, check if options are the same
   411  		optionsSame := newMd.Options().Equal(ns.Options())
   412  
   413  		// if options are the same, we don't need to do anything
   414  		if optionsSame {
   415  			continue
   416  		}
   417  
   418  		// if options are not the same, we mark for updates
   419  		updates = append(updates, newMd)
   420  	}
   421  
   422  	// check for any namespaces that need to be added
   423  	for _, ns := range newNamespaces.Metadatas() {
   424  		_, exists := d.namespaces.Get(ns.ID())
   425  		if !exists {
   426  			adds = append(adds, ns)
   427  		}
   428  	}
   429  
   430  	return removes, adds, updates
   431  }
   432  
   433  func (d *db) logNamespaceUpdate(removes []ident.ID, adds, updates []namespace.Metadata) error {
   434  	removalString, err := tsIDs(removes).String()
   435  	if err != nil {
   436  		return fmt.Errorf("unable to format removal, err = %v", err)
   437  	}
   438  
   439  	addString, err := metadatas(adds).String()
   440  	if err != nil {
   441  		return fmt.Errorf("unable to format adds, err = %v", err)
   442  	}
   443  
   444  	updateString, err := metadatas(updates).String()
   445  	if err != nil {
   446  		return fmt.Errorf("unable to format updates, err = %v", err)
   447  	}
   448  
   449  	// log scheduled operation
   450  	d.log.Info("updating database namespaces",
   451  		zap.String("adds", addString),
   452  		zap.String("updates", updateString),
   453  		zap.String("removals", removalString),
   454  	)
   455  
   456  	// NB(prateek): as noted in `UpdateOwnedNamespaces()` above, the current implementation
   457  	// does not apply updates, and removals until the m3dbnode process is restarted.
   458  
   459  	return nil
   460  }
   461  
   462  func (d *db) addNamespacesWithLock(namespaces []namespace.Metadata) error {
   463  	createdNamespaces := make([]databaseNamespace, 0, len(namespaces))
   464  
   465  	for _, n := range namespaces {
   466  		// ensure namespace doesn't exist
   467  		_, ok := d.namespaces.Get(n.ID())
   468  		if ok { // should never happen
   469  			return fmt.Errorf("existing namespace marked for addition: %v", n.ID().String())
   470  		}
   471  
   472  		// create and add to the database
   473  		newNs, err := d.newDatabaseNamespaceWithLock(n)
   474  		if err != nil {
   475  			return err
   476  		}
   477  		d.namespaces.Set(n.ID(), newNs)
   478  		createdNamespaces = append(createdNamespaces, newNs)
   479  	}
   480  
   481  	hooks := d.Options().NamespaceHooks()
   482  	for _, ns := range createdNamespaces {
   483  		err := hooks.OnCreatedNamespace(ns, d.getNamespaceWithLock)
   484  		if err != nil {
   485  			return err
   486  		}
   487  	}
   488  
   489  	return nil
   490  }
   491  
   492  func (d *db) getNamespaceWithLock(id ident.ID) (Namespace, bool) {
   493  	return d.namespaces.Get(id)
   494  }
   495  
   496  func (d *db) newDatabaseNamespaceWithLock(
   497  	md namespace.Metadata,
   498  ) (databaseNamespace, error) {
   499  	var (
   500  		retriever block.DatabaseBlockRetriever
   501  		err       error
   502  	)
   503  	if mgr := d.opts.DatabaseBlockRetrieverManager(); mgr != nil {
   504  		retriever, err = mgr.Retriever(md, d.shardSet)
   505  		if err != nil {
   506  			return nil, err
   507  		}
   508  	}
   509  	nsID := md.ID().String()
   510  	runtimeOptsMgr := d.runtimeOptionsRegistry.RuntimeOptionsManager(nsID)
   511  	return newDatabaseNamespace(md, runtimeOptsMgr,
   512  		d.shardSet, retriever, d, d.commitLog, d.opts)
   513  }
   514  
   515  func (d *db) Options() Options {
   516  	// Options are immutable safe to pass the current reference
   517  	return d.opts
   518  }
   519  
   520  func (d *db) AssignShardSet(shardSet sharding.ShardSet) {
   521  	// NB: Use bootstrapMutex to protect from competing calls.
   522  	d.bootstrapMutex.Lock()
   523  	asyncUnlock := false
   524  	defer func() {
   525  		if !asyncUnlock {
   526  			// Unlock only if asyncUnlock is not set. Otherwise, we will unlock asynchronously.
   527  			d.bootstrapMutex.Unlock()
   528  		}
   529  	}()
   530  	// NB: Can hold lock since all long running tasks are enqueued to run
   531  	// async while holding the lock.
   532  	d.Lock()
   533  	defer d.Unlock()
   534  
   535  	added, removed, updated := d.shardsDeltaWithLock(shardSet)
   536  
   537  	if !added && !removed && !updated {
   538  		d.log.Info("received identical shardSet, skipping shard assignment")
   539  		return
   540  	}
   541  
   542  	if added {
   543  		d.lastReceivedNewShards = d.nowFn()
   544  	}
   545  
   546  	if d.bootstraps == 0 || !d.mediatorIsOpenWithLock() {
   547  		// If not bootstrapped before or mediator is not open then can just
   548  		// immediately assign shards.
   549  		d.assignShardsWithLock(shardSet)
   550  		if d.bootstraps > 0 {
   551  			// If already bootstrapped before, enqueue another
   552  			// bootstrap (asynchronously, ok to trigger holding lock).
   553  			asyncUnlock = true
   554  			d.enqueueBootstrapAsync(d.bootstrapMutex.Unlock)
   555  		}
   556  		return
   557  	}
   558  
   559  	if added {
   560  		// Wait outside of holding lock to disable file operations.
   561  		d.Unlock()
   562  		d.disableFileOpsAndWait()
   563  		d.Lock()
   564  	}
   565  
   566  	d.assignShardsWithLock(shardSet)
   567  
   568  	if added {
   569  		asyncUnlock = true
   570  		d.enqueueBootstrapAsyncWithLock(func() {
   571  			d.enableFileOps()
   572  			d.bootstrapMutex.Unlock()
   573  		})
   574  	}
   575  }
   576  
   577  func (d *db) assignShardsWithLock(shardSet sharding.ShardSet) {
   578  	d.log.Info("assigning shards", zap.Uint32s("shards", shardSet.AllIDs()))
   579  	d.shardSet = shardSet
   580  	for _, elem := range d.namespaces.Iter() {
   581  		ns := elem.Value()
   582  		ns.AssignShardSet(shardSet)
   583  	}
   584  }
   585  
   586  func (d *db) shardsDeltaWithLock(incoming sharding.ShardSet) (bool, bool, bool) {
   587  	var (
   588  		existing       = d.shardSet
   589  		existingShards = existing.All()
   590  		incomingShards = incoming.All()
   591  		existingSet    = make(map[uint32]shard.Shard, len(existingShards))
   592  		incomingSet    = make(map[uint32]shard.Shard, len(incomingShards))
   593  		added          bool
   594  		removed        bool
   595  		updated        bool
   596  	)
   597  
   598  	for _, shard := range existingShards {
   599  		existingSet[shard.ID()] = shard
   600  	}
   601  
   602  	for _, shard := range incomingShards {
   603  		incomingSet[shard.ID()] = shard
   604  		existingShard, ok := existingSet[shard.ID()]
   605  		if !ok {
   606  			added = true
   607  		} else if !existingShard.Equals(shard) {
   608  			updated = true
   609  		}
   610  	}
   611  
   612  	for shardID := range existingSet {
   613  		_, ok := incomingSet[shardID]
   614  		if !ok {
   615  			removed = true
   616  			break
   617  		}
   618  	}
   619  
   620  	return added, removed, updated
   621  }
   622  
   623  func (d *db) hasReceivedNewShardsWithLock(incoming sharding.ShardSet) bool {
   624  	var (
   625  		existing    = d.shardSet
   626  		existingSet = make(map[uint32]struct{}, len(existing.AllIDs()))
   627  	)
   628  
   629  	for _, shard := range existing.AllIDs() {
   630  		existingSet[shard] = struct{}{}
   631  	}
   632  
   633  	receivedNewShards := false
   634  	for _, shard := range incoming.AllIDs() {
   635  		_, ok := existingSet[shard]
   636  		if !ok {
   637  			receivedNewShards = true
   638  			break
   639  		}
   640  	}
   641  
   642  	return receivedNewShards
   643  }
   644  
   645  func (d *db) ShardSet() sharding.ShardSet {
   646  	d.RLock()
   647  	defer d.RUnlock()
   648  	shardSet := d.shardSet
   649  	return shardSet
   650  }
   651  
   652  func (d *db) enqueueBootstrapAsync(onCompleteFn func()) {
   653  	d.log.Info("enqueuing bootstrap")
   654  	d.mediator.BootstrapEnqueue(BootstrapEnqueueOptions{
   655  		OnCompleteFn: func(_ BootstrapResult) {
   656  			onCompleteFn()
   657  		},
   658  	})
   659  }
   660  
   661  func (d *db) enqueueBootstrapAsyncWithLock(onCompleteFn func()) {
   662  	// Only perform a bootstrap if at least one bootstrap has already occurred. This enables
   663  	// the ability to open the clustered database and assign shardsets to the non-clustered
   664  	// database when it receives an initial topology (as well as topology changes) without
   665  	// triggering a bootstrap until an external call initiates a bootstrap with an initial
   666  	// call to Bootstrap(). After that initial bootstrap, the clustered database will keep
   667  	// the non-clustered database bootstrapped by assigning it shardsets which will trigger new
   668  	// bootstraps since d.bootstraps > 0 will be true.
   669  	if d.bootstraps > 0 {
   670  		d.log.Info("enqueuing bootstrap with onComplete function")
   671  		d.mediator.BootstrapEnqueue(BootstrapEnqueueOptions{
   672  			OnCompleteFn: func(_ BootstrapResult) {
   673  				onCompleteFn()
   674  			},
   675  		})
   676  		return
   677  	}
   678  
   679  	onCompleteFn()
   680  }
   681  
   682  func (d *db) Namespace(id ident.ID) (Namespace, bool) {
   683  	d.RLock()
   684  	defer d.RUnlock()
   685  	return d.namespaces.Get(id)
   686  }
   687  
   688  func (d *db) Namespaces() []Namespace {
   689  	d.RLock()
   690  	defer d.RUnlock()
   691  	namespaces := make([]Namespace, 0, d.namespaces.Len())
   692  	for _, elem := range d.namespaces.Iter() {
   693  		namespaces = append(namespaces, elem.Value())
   694  	}
   695  	return namespaces
   696  }
   697  
   698  func (d *db) Open() error {
   699  	d.Lock()
   700  	defer d.Unlock()
   701  	// check if db has already been opened
   702  	if d.state != databaseNotOpen {
   703  		return errDatabaseAlreadyOpen
   704  	}
   705  	d.state = databaseOpen
   706  
   707  	// start namespace watch
   708  	if err := d.nsWatch.Start(); err != nil {
   709  		return err
   710  	}
   711  
   712  	// Start the wired list
   713  	if wiredList := d.opts.DatabaseBlockOptions().WiredList(); wiredList != nil {
   714  		err := wiredList.Start()
   715  		if err != nil {
   716  			return err
   717  		}
   718  	}
   719  
   720  	return d.mediator.Open()
   721  }
   722  
   723  func (d *db) terminateWithLock() error {
   724  	// ensure database is open
   725  	if d.state == databaseNotOpen {
   726  		return errDatabaseNotOpen
   727  	}
   728  	if d.state == databaseClosed {
   729  		return errDatabaseAlreadyClosed
   730  	}
   731  	d.state = databaseClosed
   732  
   733  	// close the mediator
   734  	if err := d.mediator.Close(); err != nil {
   735  		return err
   736  	}
   737  
   738  	// stop listening for namespace changes
   739  	if err := d.nsWatch.Close(); err != nil {
   740  		return err
   741  	}
   742  
   743  	// Stop the wired list
   744  	if wiredList := d.opts.DatabaseBlockOptions().WiredList(); wiredList != nil {
   745  		err := wiredList.Stop()
   746  		if err != nil {
   747  			return err
   748  		}
   749  	}
   750  
   751  	// NB(prateek): Terminate is meant to return quickly, so we rely upon
   752  	// the gc to clean up any resources held by namespaces, and just set
   753  	// our reference to the namespaces to nil.
   754  	d.namespaces.Reallocate()
   755  
   756  	// Finally close the commit log
   757  	return d.commitLog.Close()
   758  }
   759  
   760  func (d *db) Terminate() error {
   761  	// NB(bodu): Disable file ops waits for current fs processes to
   762  	// finish before disabling.
   763  	d.mediator.DisableFileOpsAndWait()
   764  
   765  	d.Lock()
   766  	defer d.Unlock()
   767  
   768  	return d.terminateWithLock()
   769  }
   770  
   771  func (d *db) Close() error {
   772  	// NB(bodu): Disable file ops waits for current fs processes to
   773  	// finish before disabling.
   774  	d.mediator.DisableFileOpsAndWait()
   775  
   776  	d.Lock()
   777  	defer d.Unlock()
   778  
   779  	// get a reference to all owned namespaces
   780  	namespaces := d.ownedNamespacesWithLock()
   781  
   782  	// release any database level resources
   783  	if err := d.terminateWithLock(); err != nil {
   784  		return err
   785  	}
   786  
   787  	var multiErr xerrors.MultiError
   788  	for _, ns := range namespaces {
   789  		multiErr = multiErr.Add(ns.Close())
   790  	}
   791  
   792  	return multiErr.FinalError()
   793  }
   794  
   795  func (d *db) Write(
   796  	ctx context.Context,
   797  	namespace ident.ID,
   798  	id ident.ID,
   799  	timestamp xtime.UnixNano,
   800  	value float64,
   801  	unit xtime.Unit,
   802  	annotation []byte,
   803  ) error {
   804  	n, err := d.namespaceFor(namespace)
   805  	if err != nil {
   806  		d.metrics.unknownNamespaceWrite.Inc(1)
   807  		return err
   808  	}
   809  
   810  	seriesWrite, err := n.Write(ctx, id, timestamp, value, unit, annotation)
   811  	if err != nil {
   812  		return err
   813  	}
   814  
   815  	if !n.Options().WritesToCommitLog() || !seriesWrite.WasWritten {
   816  		return nil
   817  	}
   818  
   819  	dp := ts.Datapoint{
   820  		TimestampNanos: timestamp,
   821  		Value:          value,
   822  	}
   823  
   824  	return d.commitLog.Write(ctx, seriesWrite.Series, dp, unit, annotation)
   825  }
   826  
   827  func (d *db) WriteTagged(
   828  	ctx context.Context,
   829  	namespace ident.ID,
   830  	id ident.ID,
   831  	tagResolver convert.TagMetadataResolver,
   832  	timestamp xtime.UnixNano,
   833  	value float64,
   834  	unit xtime.Unit,
   835  	annotation []byte,
   836  ) error {
   837  	n, err := d.namespaceFor(namespace)
   838  	if err != nil {
   839  		d.metrics.unknownNamespaceWriteTagged.Inc(1)
   840  		return err
   841  	}
   842  
   843  	seriesWrite, err := n.WriteTagged(ctx, id, tagResolver, timestamp, value, unit, annotation)
   844  	if err != nil {
   845  		return err
   846  	}
   847  
   848  	if !n.Options().WritesToCommitLog() || !seriesWrite.WasWritten {
   849  		return nil
   850  	}
   851  
   852  	dp := ts.Datapoint{
   853  		TimestampNanos: timestamp,
   854  		Value:          value,
   855  	}
   856  
   857  	return d.commitLog.Write(ctx, seriesWrite.Series, dp, unit, annotation)
   858  }
   859  
   860  func (d *db) BatchWriter(namespace ident.ID, batchSize int) (writes.BatchWriter, error) {
   861  	n, err := d.namespaceFor(namespace)
   862  	if err != nil {
   863  		d.metrics.unknownNamespaceBatchWriter.Inc(1)
   864  		return nil, err
   865  	}
   866  
   867  	var (
   868  		nsID        = n.ID()
   869  		batchWriter = d.writeBatchPool.Get()
   870  	)
   871  	batchWriter.Reset(batchSize, nsID)
   872  	return batchWriter, nil
   873  }
   874  
   875  func (d *db) WriteBatch(
   876  	ctx context.Context,
   877  	namespace ident.ID,
   878  	writer writes.BatchWriter,
   879  	errHandler IndexedErrorHandler,
   880  ) error {
   881  	return d.writeBatch(ctx, namespace, writer, errHandler, false)
   882  }
   883  
   884  func (d *db) WriteTaggedBatch(
   885  	ctx context.Context,
   886  	namespace ident.ID,
   887  	writer writes.BatchWriter,
   888  	errHandler IndexedErrorHandler,
   889  ) error {
   890  	return d.writeBatch(ctx, namespace, writer, errHandler, true)
   891  }
   892  
   893  func (d *db) writeBatch(
   894  	ctx context.Context,
   895  	namespace ident.ID,
   896  	writer writes.BatchWriter,
   897  	errHandler IndexedErrorHandler,
   898  	tagged bool,
   899  ) error {
   900  	n, err := d.namespaceFor(namespace)
   901  	if err != nil {
   902  		if tagged {
   903  			d.metrics.unknownNamespaceWriteTaggedBatch.Inc(1)
   904  		} else {
   905  			d.metrics.unknownNamespaceWriteBatch.Inc(1)
   906  		}
   907  		return err
   908  	}
   909  
   910  	ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBWriteBatch)
   911  	if sampled {
   912  		sp.LogFields(
   913  			opentracinglog.String("namespace", namespace.String()),
   914  			opentracinglog.Bool("tagged", tagged),
   915  		)
   916  	}
   917  
   918  	defer sp.Finish()
   919  	writes, ok := writer.(writes.WriteBatch)
   920  	if !ok {
   921  		return errWriterDoesNotImplementWriteBatch
   922  	}
   923  
   924  	iter := writes.Iter()
   925  	for i, write := range iter {
   926  		var (
   927  			seriesWrite SeriesWrite
   928  			err         error
   929  		)
   930  
   931  		if tagged {
   932  			seriesWrite, err = n.WriteTagged(
   933  				ctx,
   934  				write.Write.Series.ID,
   935  				convert.NewEncodedTagsMetadataResolver(write.EncodedTags),
   936  				write.Write.Datapoint.TimestampNanos,
   937  				write.Write.Datapoint.Value,
   938  				write.Write.Unit,
   939  				write.Write.Annotation,
   940  			)
   941  		} else {
   942  			seriesWrite, err = n.Write(
   943  				ctx,
   944  				write.Write.Series.ID,
   945  				write.Write.Datapoint.TimestampNanos,
   946  				write.Write.Datapoint.Value,
   947  				write.Write.Unit,
   948  				write.Write.Annotation,
   949  			)
   950  		}
   951  		if err != nil {
   952  			// Return errors with the original index provided by the caller so they
   953  			// can associate the error with the write that caused it.
   954  			errHandler.HandleError(write.OriginalIndex, err)
   955  			writes.SetError(i, err)
   956  			continue
   957  		}
   958  
   959  		// Need to set the outcome in the success case so the commitlog gets the
   960  		// updated series object which contains identifiers (like the series ID)
   961  		// whose lifecycle lives longer than the span of this request, making them
   962  		// safe for use by the async commitlog. Need to set the outcome in the
   963  		// error case so that the commitlog knows to skip this entry.
   964  		writes.SetSeries(i, seriesWrite.Series)
   965  
   966  		if !seriesWrite.WasWritten {
   967  			// This series has no additional information that needs to be written to
   968  			// the commit log; set this series to skip writing to the commit log.
   969  			writes.SetSkipWrite(i)
   970  		}
   971  
   972  		if seriesWrite.NeedsIndex {
   973  			writes.SetPendingIndex(i, seriesWrite.PendingIndexInsert)
   974  		}
   975  	}
   976  
   977  	// Now insert all pending index inserts together in one go
   978  	// to limit lock contention.
   979  	if pending := writes.PendingIndex(); len(pending) > 0 {
   980  		err := n.WritePendingIndexInserts(pending)
   981  		if err != nil {
   982  			// Mark those as pending index with an error.
   983  			// Note: this is an invariant error, queueing should never fail
   984  			// when so it's fine to fail all these entries if we can't
   985  			// write pending index inserts.
   986  			for i, write := range iter {
   987  				if write.PendingIndex {
   988  					errHandler.HandleError(write.OriginalIndex, err)
   989  					writes.SetError(i, err)
   990  				}
   991  			}
   992  		}
   993  	}
   994  
   995  	if !n.Options().WritesToCommitLog() {
   996  		// Finalize here because we can't rely on the commitlog to do it since
   997  		// we're not using it.
   998  		writes.Finalize()
   999  		return nil
  1000  	}
  1001  
  1002  	return d.commitLog.WriteBatch(ctx, writes)
  1003  }
  1004  
  1005  func (d *db) QueryIDs(
  1006  	ctx context.Context,
  1007  	namespace ident.ID,
  1008  	query index.Query,
  1009  	opts index.QueryOptions,
  1010  ) (index.QueryResult, error) {
  1011  	ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBQueryIDs)
  1012  	if sampled {
  1013  		sp.LogFields(
  1014  			opentracinglog.String("query", query.String()),
  1015  			opentracinglog.String("namespace", namespace.String()),
  1016  			opentracinglog.Int("seriesLimit", opts.SeriesLimit),
  1017  			opentracinglog.Int("docsLimit", opts.DocsLimit),
  1018  			xopentracing.Time("start", opts.StartInclusive.ToTime()),
  1019  			xopentracing.Time("end", opts.EndExclusive.ToTime()),
  1020  		)
  1021  	}
  1022  	defer sp.Finish()
  1023  
  1024  	// Check if exceeding query limits at very beginning of
  1025  	// query path to abandon as early as possible.
  1026  	if err := d.queryLimits.AnyFetchExceeded(); err != nil {
  1027  		return index.QueryResult{}, err
  1028  	}
  1029  
  1030  	n, err := d.namespaceFor(namespace)
  1031  	if err != nil {
  1032  		sp.LogFields(opentracinglog.Error(err))
  1033  		d.metrics.unknownNamespaceQueryIDs.Inc(1)
  1034  		return index.QueryResult{}, err
  1035  	}
  1036  
  1037  	return n.QueryIDs(ctx, query, opts)
  1038  }
  1039  
  1040  func (d *db) AggregateQuery(
  1041  	ctx context.Context,
  1042  	namespace ident.ID,
  1043  	query index.Query,
  1044  	aggResultOpts index.AggregationOptions,
  1045  ) (index.AggregateQueryResult, error) {
  1046  	n, err := d.namespaceFor(namespace)
  1047  	if err != nil {
  1048  		d.metrics.unknownNamespaceQueryIDs.Inc(1)
  1049  		return index.AggregateQueryResult{}, err
  1050  	}
  1051  
  1052  	ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBAggregateQuery)
  1053  	if sampled {
  1054  		sp.LogFields(
  1055  			opentracinglog.String("query", query.String()),
  1056  			opentracinglog.String("namespace", namespace.String()),
  1057  			opentracinglog.Int("seriesLimit", aggResultOpts.QueryOptions.SeriesLimit),
  1058  			opentracinglog.Int("docsLimit", aggResultOpts.QueryOptions.DocsLimit),
  1059  			xopentracing.Time("start", aggResultOpts.QueryOptions.StartInclusive.ToTime()),
  1060  			xopentracing.Time("end", aggResultOpts.QueryOptions.EndExclusive.ToTime()),
  1061  		)
  1062  	}
  1063  
  1064  	defer sp.Finish()
  1065  	return n.AggregateQuery(ctx, query, aggResultOpts)
  1066  }
  1067  
  1068  func (d *db) ReadEncoded(
  1069  	ctx context.Context,
  1070  	namespace ident.ID,
  1071  	id ident.ID,
  1072  	start, end xtime.UnixNano,
  1073  ) (series.BlockReaderIter, error) {
  1074  	n, err := d.namespaceFor(namespace)
  1075  	if err != nil {
  1076  		d.metrics.unknownNamespaceRead.Inc(1)
  1077  		return nil, err
  1078  	}
  1079  
  1080  	return n.ReadEncoded(ctx, id, start, end)
  1081  }
  1082  
  1083  func (d *db) FetchBlocks(
  1084  	ctx context.Context,
  1085  	namespace ident.ID,
  1086  	shardID uint32,
  1087  	id ident.ID,
  1088  	starts []xtime.UnixNano,
  1089  ) ([]block.FetchBlockResult, error) {
  1090  	n, err := d.namespaceFor(namespace)
  1091  	if err != nil {
  1092  		d.metrics.unknownNamespaceFetchBlocks.Inc(1)
  1093  		return nil, xerrors.NewInvalidParamsError(err)
  1094  	}
  1095  
  1096  	ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBFetchBlocks)
  1097  	if sampled {
  1098  		sp.LogFields(
  1099  			opentracinglog.String("namespace", namespace.String()),
  1100  			opentracinglog.Uint32("shardID", shardID),
  1101  			opentracinglog.String("id", id.String()),
  1102  		)
  1103  	}
  1104  
  1105  	defer sp.Finish()
  1106  	return n.FetchBlocks(ctx, shardID, id, starts)
  1107  }
  1108  
  1109  func (d *db) FetchBlocksMetadataV2(
  1110  	ctx context.Context,
  1111  	namespace ident.ID,
  1112  	shardID uint32,
  1113  	start, end xtime.UnixNano,
  1114  	limit int64,
  1115  	pageToken PageToken,
  1116  	opts block.FetchBlocksMetadataOptions,
  1117  ) (block.FetchBlocksMetadataResults, PageToken, error) {
  1118  	n, err := d.namespaceFor(namespace)
  1119  	if err != nil {
  1120  		d.metrics.unknownNamespaceFetchBlocksMetadata.Inc(1)
  1121  		return nil, nil, xerrors.NewInvalidParamsError(err)
  1122  	}
  1123  
  1124  	ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBFetchBlocksMetadataV2)
  1125  	if sampled {
  1126  		sp.LogFields(
  1127  			opentracinglog.String("namespace", namespace.String()),
  1128  			opentracinglog.Uint32("shardID", shardID),
  1129  			xopentracing.Time("start", start.ToTime()),
  1130  			xopentracing.Time("end", end.ToTime()),
  1131  			opentracinglog.Int64("limit", limit),
  1132  		)
  1133  	}
  1134  
  1135  	defer sp.Finish()
  1136  	return n.FetchBlocksMetadataV2(ctx, shardID, start, end, limit,
  1137  		pageToken, opts)
  1138  }
  1139  
  1140  func (d *db) Bootstrap() error {
  1141  	d.Lock()
  1142  	d.bootstraps++
  1143  	d.Unlock()
  1144  
  1145  	// NB: We need to acquire bootstrapMutex to protect from receiving new shardSets or namespaces during
  1146  	// bootstrapping.
  1147  	d.bootstrapMutex.Lock()
  1148  	_, err := d.mediator.Bootstrap()
  1149  	d.bootstrapMutex.Unlock()
  1150  	return err
  1151  }
  1152  
  1153  func (d *db) IsBootstrapped() bool {
  1154  	return d.mediator.IsBootstrapped()
  1155  }
  1156  
  1157  // IsBootstrappedAndDurable should only return true if the following conditions are met:
  1158  //    1. The database is bootstrapped.
  1159  //    2. The last successful snapshot began AFTER the last bootstrap completed.
  1160  //
  1161  // Those two conditions should be sufficient to ensure that after a placement change the
  1162  // node will be able to bootstrap any and all data from its local disk, however, for posterity
  1163  // we also perform the following check:
  1164  //     3. The last bootstrap completed AFTER the shardset was last assigned.
  1165  func (d *db) IsBootstrappedAndDurable() bool {
  1166  	isBootstrapped := d.mediator.IsBootstrapped()
  1167  	if !isBootstrapped {
  1168  		d.log.Debug("not bootstrapped and durable because: not bootstrapped")
  1169  		return false
  1170  	}
  1171  
  1172  	lastBootstrapCompletionTimeNano, ok := d.mediator.LastBootstrapCompletionTime()
  1173  	if !ok {
  1174  		d.log.Debug("not bootstrapped and durable because: no last bootstrap completion time",
  1175  			zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTimeNano.ToTime()))
  1176  
  1177  		return false
  1178  	}
  1179  
  1180  	lastSnapshotStartTime, ok := d.mediator.LastSuccessfulSnapshotStartTime()
  1181  	if !ok {
  1182  		d.log.Debug("not bootstrapped and durable because: no last snapshot start time",
  1183  			zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTimeNano.ToTime()),
  1184  			zap.Time("lastSnapshotStartTime", lastSnapshotStartTime.ToTime()),
  1185  		)
  1186  		return false
  1187  	}
  1188  
  1189  	var (
  1190  		lastBootstrapCompletionTime            = lastBootstrapCompletionTimeNano.ToTime()
  1191  		hasSnapshottedPostBootstrap            = lastSnapshotStartTime.After(lastBootstrapCompletionTimeNano)
  1192  		hasBootstrappedSinceReceivingNewShards = lastBootstrapCompletionTime.After(d.lastReceivedNewShards) ||
  1193  			lastBootstrapCompletionTime.Equal(d.lastReceivedNewShards)
  1194  		isBootstrappedAndDurable = hasSnapshottedPostBootstrap &&
  1195  			hasBootstrappedSinceReceivingNewShards
  1196  	)
  1197  
  1198  	if !isBootstrappedAndDurable {
  1199  		d.log.Debug(
  1200  			"not bootstrapped and durable because: has not snapshotted post bootstrap and/or has not bootstrapped since receiving new shards",
  1201  			zap.Time("lastBootstrapCompletionTime", lastBootstrapCompletionTime),
  1202  			zap.Time("lastSnapshotStartTime", lastSnapshotStartTime.ToTime()),
  1203  			zap.Time("lastReceivedNewShards", d.lastReceivedNewShards),
  1204  		)
  1205  		return false
  1206  	}
  1207  
  1208  	return true
  1209  }
  1210  
  1211  func (d *db) Repair() error {
  1212  	return d.repairer.Repair()
  1213  }
  1214  
  1215  func (d *db) Truncate(namespace ident.ID) (int64, error) {
  1216  	n, err := d.namespaceFor(namespace)
  1217  	if err != nil {
  1218  		return 0, err
  1219  	}
  1220  	return n.Truncate()
  1221  }
  1222  
  1223  func (d *db) IsOverloaded() bool {
  1224  	queueSize := float64(d.commitLog.QueueLength())
  1225  	queueCapacity := float64(d.opts.CommitLogOptions().BacklogQueueSize())
  1226  	return queueSize >= commitLogQueueCapacityOverloadedFactor*queueCapacity
  1227  }
  1228  
  1229  func (d *db) BootstrapState() DatabaseBootstrapState {
  1230  	nsBootstrapStates := NamespaceBootstrapStates{}
  1231  
  1232  	d.RLock()
  1233  	for _, n := range d.namespaces.Iter() {
  1234  		ns := n.Value()
  1235  		nsBootstrapStates[ns.ID().String()] = ns.ShardBootstrapState()
  1236  	}
  1237  	d.RUnlock()
  1238  
  1239  	return DatabaseBootstrapState{
  1240  		NamespaceBootstrapStates: nsBootstrapStates,
  1241  	}
  1242  }
  1243  
  1244  func (d *db) FlushState(
  1245  	namespace ident.ID,
  1246  	shardID uint32,
  1247  	blockStart xtime.UnixNano,
  1248  ) (fileOpState, error) {
  1249  	n, err := d.namespaceFor(namespace)
  1250  	if err != nil {
  1251  		return fileOpState{}, err
  1252  	}
  1253  	return n.FlushState(shardID, blockStart)
  1254  }
  1255  
  1256  func (d *db) namespaceFor(namespace ident.ID) (databaseNamespace, error) {
  1257  	d.RLock()
  1258  	n, exists := d.namespaces.Get(namespace)
  1259  	d.RUnlock()
  1260  
  1261  	if !exists {
  1262  		return nil, dberrors.NewUnknownNamespaceError(namespace.String())
  1263  	}
  1264  	return n, nil
  1265  }
  1266  
  1267  func (d *db) ownedNamespacesWithLock() []databaseNamespace {
  1268  	namespaces := make([]databaseNamespace, 0, d.namespaces.Len())
  1269  	for _, n := range d.namespaces.Iter() {
  1270  		namespaces = append(namespaces, n.Value())
  1271  	}
  1272  	return namespaces
  1273  }
  1274  
  1275  func (d *db) OwnedNamespaces() ([]databaseNamespace, error) {
  1276  	d.RLock()
  1277  	defer d.RUnlock()
  1278  	if d.state == databaseClosed {
  1279  		return nil, errDatabaseIsClosed
  1280  	}
  1281  	return d.ownedNamespacesWithLock(), nil
  1282  }
  1283  
  1284  func (d *db) AggregateTiles(
  1285  	ctx context.Context,
  1286  	sourceNsID,
  1287  	targetNsID ident.ID,
  1288  	opts AggregateTilesOptions,
  1289  ) (int64, error) {
  1290  	jobInProgress := opts.InsOptions.MetricsScope().Gauge("aggregations-in-progress")
  1291  	atomic.AddInt32(&aggregationsInProgress, 1)
  1292  	jobInProgress.Update(float64(aggregationsInProgress))
  1293  	defer func() {
  1294  		atomic.AddInt32(&aggregationsInProgress, -1)
  1295  		jobInProgress.Update(float64(aggregationsInProgress))
  1296  	}()
  1297  
  1298  	ctx, sp, sampled := ctx.StartSampledTraceSpan(tracepoint.DBAggregateTiles)
  1299  	if sampled {
  1300  		sp.LogFields(
  1301  			opentracinglog.String("sourceNamespace", sourceNsID.String()),
  1302  			opentracinglog.String("targetNamespace", targetNsID.String()),
  1303  			xopentracing.Time("start", opts.Start.ToTime()),
  1304  			xopentracing.Time("end", opts.End.ToTime()),
  1305  			xopentracing.Duration("step", opts.Step),
  1306  		)
  1307  	}
  1308  	defer sp.Finish()
  1309  
  1310  	sourceNs, err := d.namespaceFor(sourceNsID)
  1311  	if err != nil {
  1312  		d.metrics.unknownNamespaceRead.Inc(1)
  1313  		return 0, err
  1314  	}
  1315  
  1316  	targetNs, err := d.namespaceFor(targetNsID)
  1317  	if err != nil {
  1318  		d.metrics.unknownNamespaceRead.Inc(1)
  1319  		return 0, err
  1320  	}
  1321  
  1322  	processedTileCount, err := targetNs.AggregateTiles(ctx, sourceNs, opts)
  1323  	if err != nil {
  1324  		d.log.Error("error writing large tiles",
  1325  			zap.String("sourceNs", sourceNsID.String()),
  1326  			zap.String("targetNs", targetNsID.String()),
  1327  			zap.Error(err),
  1328  		)
  1329  		reportAggregateTilesErrors(opts.InsOptions.MetricsScope(), err)
  1330  	}
  1331  
  1332  	return processedTileCount, err
  1333  }
  1334  
  1335  func (d *db) nextIndex() uint64 {
  1336  	// Start with index at "1" so that a default "uniqueIndex"
  1337  	// with "0" is invalid (AddUint64 will return the new value).
  1338  	return atomic.AddUint64(&d.created, 1)
  1339  }
  1340  
  1341  type tsIDs []ident.ID
  1342  
  1343  func (t tsIDs) String() (string, error) {
  1344  	var buf bytes.Buffer
  1345  	buf.WriteRune('[')
  1346  	for idx, id := range t {
  1347  		if idx != 0 {
  1348  			if _, err := buf.WriteString(", "); err != nil {
  1349  				return "", err
  1350  			}
  1351  		}
  1352  		if _, err := buf.WriteString(id.String()); err != nil {
  1353  			return "", err
  1354  		}
  1355  	}
  1356  	buf.WriteRune(']')
  1357  	return buf.String(), nil
  1358  }
  1359  
  1360  type metadatas []namespace.Metadata
  1361  
  1362  func (m metadatas) String() (string, error) {
  1363  	var buf bytes.Buffer
  1364  	buf.WriteRune('[')
  1365  	for idx, md := range m {
  1366  		if idx != 0 {
  1367  			if _, err := buf.WriteString(", "); err != nil {
  1368  				return "", err
  1369  			}
  1370  		}
  1371  		if _, err := buf.WriteString(md.ID().String()); err != nil {
  1372  			return "", err
  1373  		}
  1374  	}
  1375  	buf.WriteRune(']')
  1376  	return buf.String(), nil
  1377  }
  1378  
  1379  // NewAggregateTilesOptions creates new AggregateTilesOptions.
  1380  func NewAggregateTilesOptions(
  1381  	start, end xtime.UnixNano,
  1382  	step time.Duration,
  1383  	targetNsID ident.ID,
  1384  	process AggregateTilesProcess,
  1385  	memorizeMetricTypes, backfillMetricTypes bool,
  1386  	metricTypeByName map[string]annotation.Payload,
  1387  	insOpts instrument.Options,
  1388  ) (AggregateTilesOptions, error) {
  1389  	if !end.After(start) {
  1390  		return AggregateTilesOptions{}, fmt.Errorf("AggregateTilesOptions.End must be after Start, got %s - %s", start, end)
  1391  	}
  1392  
  1393  	if step <= 0 {
  1394  		return AggregateTilesOptions{}, fmt.Errorf("AggregateTilesOptions.Step must be positive, got %s", step)
  1395  	}
  1396  
  1397  	if (memorizeMetricTypes || backfillMetricTypes) && metricTypeByName == nil {
  1398  		return AggregateTilesOptions{}, errors.New(
  1399  			"metricTypeByName must not be nil when memorizeMetricTypes or backfillMetricTypes is true")
  1400  	}
  1401  
  1402  	scope := insOpts.MetricsScope().SubScope("computed-namespace")
  1403  	insOpts = insOpts.SetMetricsScope(scope.Tagged(map[string]string{
  1404  		"target-namespace": targetNsID.String(),
  1405  		"process":          process.String(),
  1406  	}))
  1407  
  1408  	return AggregateTilesOptions{
  1409  		Start:   start,
  1410  		End:     end,
  1411  		Step:    step,
  1412  		Process: process,
  1413  
  1414  		MemorizeMetricTypes: memorizeMetricTypes,
  1415  		BackfillMetricTypes: backfillMetricTypes,
  1416  		MetricTypeByName:    metricTypeByName,
  1417  
  1418  		InsOptions: insOpts,
  1419  	}, nil
  1420  }
  1421  
  1422  func reportAggregateTilesErrors(scope tally.Scope, err error) {
  1423  	errorType := "not-categorized"
  1424  	if xerrors.Is(err, client.ErrSessionStatusNotOpen) {
  1425  		errorType = "connection-to-peer"
  1426  	}
  1427  	scope.Tagged(map[string]string{"error-type": errorType}).Counter("aggregate-tiles-failed").Inc(1)
  1428  }