github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/entry.go

github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/entry.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package storage
    22  
    23  import (
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/m3db/m3/src/dbnode/storage/block"
    29  	"github.com/m3db/m3/src/dbnode/storage/bootstrap"
    30  	"github.com/m3db/m3/src/dbnode/storage/index"
    31  	"github.com/m3db/m3/src/dbnode/storage/series"
    32  	"github.com/m3db/m3/src/dbnode/ts/writes"
    33  	"github.com/m3db/m3/src/m3ninx/doc"
    34  	"github.com/m3db/m3/src/x/clock"
    35  	"github.com/m3db/m3/src/x/context"
    36  	m3errors "github.com/m3db/m3/src/x/errors"
    37  	"github.com/m3db/m3/src/x/ident"
    38  	"github.com/m3db/m3/src/x/resource"
    39  	xtime "github.com/m3db/m3/src/x/time"
    40  
    41  	"github.com/uber-go/tally"
    42  	xatomic "go.uber.org/atomic"
    43  )
    44  
    45  // IndexWriter accepts index inserts.
    46  type IndexWriter interface {
    47  	// WritePending indexes the provided pending entries.
    48  	WritePending(
    49  		pending []writes.PendingIndexInsert,
    50  	) error
    51  
    52  	// BlockStartForWriteTime returns the index block start
    53  	// time for the given writeTime.
    54  	BlockStartForWriteTime(
    55  		writeTime xtime.UnixNano,
    56  	) xtime.UnixNano
    57  }
    58  
    59  // EntryMetrics are metrics for an entry.
    60  type EntryMetrics struct {
    61  	gcNoReconcile           tally.Counter
    62  	gcNeedsReconcile        tally.Counter
    63  	gcSuccessShardClosed    tally.Counter
    64  	gcSuccessEmpty          tally.Counter
    65  	noGcNil                 tally.Counter
    66  	noGcErr                 tally.Counter
    67  	noGcHasReaders          tally.Counter
    68  	noGcNotEmptySeries      tally.Counter
    69  	duplicateNoReconcile    tally.Counter
    70  	duplicateNeedsReconcile tally.Counter
    71  }
    72  
    73  // NewEntryMetrics builds an entry metrics.
    74  func NewEntryMetrics(scope tally.Scope) *EntryMetrics {
    75  	return &EntryMetrics{
    76  		gcNoReconcile: scope.Tagged(map[string]string{
    77  			"reconcile": "no_reconcile",
    78  			"path":      "gc",
    79  		}).Counter("count"),
    80  		gcNeedsReconcile: scope.Tagged(map[string]string{
    81  			"reconcile": "needs_reconcile",
    82  			"path":      "gc",
    83  		}).Counter("count"),
    84  		gcSuccessShardClosed: scope.Tagged(map[string]string{
    85  			"reason": "shard_closed",
    86  			"path":   "gc",
    87  		}).Counter("gc_count"),
    88  		gcSuccessEmpty: scope.Tagged(map[string]string{
    89  			"reason": "empty",
    90  			"path":   "gc",
    91  		}).Counter("gc_count"),
    92  		noGcNil: scope.Tagged(map[string]string{
    93  			"reason": "nil",
    94  			"path":   "gc",
    95  		}).Counter("no_gc_count"),
    96  		noGcErr: scope.Tagged(map[string]string{
    97  			"reason": "error",
    98  			"path":   "gc",
    99  		}).Counter("no_gc_count"),
   100  		noGcHasReaders: scope.Tagged(map[string]string{
   101  			"reason": "has_readers",
   102  			"path":   "gc",
   103  		}).Counter("no_gc_count"),
   104  		noGcNotEmptySeries: scope.Tagged(map[string]string{
   105  			"reason": "not_empty_series",
   106  			"path":   "gc",
   107  		}).Counter("no_gc_count"),
   108  
   109  		duplicateNoReconcile: scope.Tagged(map[string]string{
   110  			"reconcile": "no_reconcile",
   111  			"path":      "duplicate",
   112  		}).Counter("count"),
   113  		duplicateNeedsReconcile: scope.Tagged(map[string]string{
   114  			"reconcile": "needs_reconcile",
   115  			"path":      "duplicate",
   116  		}).Counter("count"),
   117  	}
   118  }
   119  
   120  // Entry is the entry in the shard ident.ID -> series map. It has additional
   121  // members to track lifecycle and minimize indexing overhead.
   122  // NB: users are expected to use `NewEntry` to construct these objects.
   123  type Entry struct {
   124  	ID                       ident.ID
   125  	Shard                    Shard
   126  	Series                   series.DatabaseSeries
   127  	Index                    uint64
   128  	IndexGarbageCollected    *xatomic.Bool
   129  	insertTime               *xatomic.Int64
   130  	indexWriter              IndexWriter
   131  	curReadWriters           int32
   132  	reverseIndex             entryIndexState
   133  	nowFn                    clock.NowFn
   134  	metrics                  *EntryMetrics
   135  	pendingIndexBatchSizeOne []writes.PendingIndexInsert
   136  }
   137  
   138  // ensure Entry satisfies the `doc.OnIndexSeries` interface.
   139  var _ doc.OnIndexSeries = &Entry{}
   140  
   141  // ensure Entry satisfies the `bootstrap.SeriesRef` interface.
   142  var _ bootstrap.SeriesRef = &Entry{}
   143  
   144  // ensure Entry satisfies the `bootstrap.SeriesRefResolver` interface.
   145  var _ bootstrap.SeriesRefResolver = &Entry{}
   146  
   147  // NewEntryOptions supplies options for a new entry.
   148  type NewEntryOptions struct {
   149  	Shard        Shard
   150  	Series       series.DatabaseSeries
   151  	Index        uint64
   152  	IndexWriter  IndexWriter
   153  	NowFn        clock.NowFn
   154  	EntryMetrics *EntryMetrics
   155  }
   156  
   157  // NewEntry returns a new Entry.
   158  func NewEntry(opts NewEntryOptions) *Entry {
   159  	nowFn := time.Now
   160  	if opts.NowFn != nil {
   161  		nowFn = opts.NowFn
   162  	}
   163  	entry := &Entry{
   164  		ID:                       opts.Series.ID(),
   165  		Shard:                    opts.Shard,
   166  		Series:                   opts.Series,
   167  		Index:                    opts.Index,
   168  		IndexGarbageCollected:    xatomic.NewBool(false),
   169  		insertTime:               xatomic.NewInt64(0),
   170  		indexWriter:              opts.IndexWriter,
   171  		nowFn:                    nowFn,
   172  		pendingIndexBatchSizeOne: make([]writes.PendingIndexInsert, 1),
   173  		reverseIndex:             newEntryIndexState(),
   174  		metrics:                  opts.EntryMetrics,
   175  	}
   176  	return entry
   177  }
   178  
   179  // StringID returns the index series ID, as a string.
   180  func (entry *Entry) StringID() string {
   181  	return entry.ID.String()
   182  }
   183  
   184  // ReaderWriterCount returns the current ref count on the Entry.
   185  func (entry *Entry) ReaderWriterCount() int32 {
   186  	return atomic.LoadInt32(&entry.curReadWriters)
   187  }
   188  
   189  // IncrementReaderWriterCount increments the ref count on the Entry.
   190  func (entry *Entry) IncrementReaderWriterCount() {
   191  	atomic.AddInt32(&entry.curReadWriters, 1)
   192  }
   193  
   194  // DecrementReaderWriterCount decrements the ref count on the Entry.
   195  func (entry *Entry) DecrementReaderWriterCount() {
   196  	atomic.AddInt32(&entry.curReadWriters, -1)
   197  }
   198  
   199  // IndexedBlockCount returns the count of indexed block states.
   200  func (entry *Entry) IndexedBlockCount() int {
   201  	entry.reverseIndex.RLock()
   202  	count := len(entry.reverseIndex.states)
   203  	entry.reverseIndex.RUnlock()
   204  	return count
   205  }
   206  
   207  // IndexedForBlockStart returns a bool to indicate if the Entry has been successfully
   208  // indexed for the given index blockStart.
   209  func (entry *Entry) IndexedForBlockStart(indexBlockStart xtime.UnixNano) bool {
   210  	entry.reverseIndex.RLock()
   211  	isIndexed := entry.reverseIndex.indexedWithRLock(indexBlockStart)
   212  	entry.reverseIndex.RUnlock()
   213  	return isIndexed
   214  }
   215  
   216  // IndexedRange returns minimum and maximum blockStart values covered by index entry.
   217  // The range is inclusive. Note that there may be uncovered gaps within the range.
   218  // Returns (0, 0) for an empty range.
   219  func (entry *Entry) IndexedRange() (xtime.UnixNano, xtime.UnixNano) {
   220  	entry.reverseIndex.RLock()
   221  	min, max := entry.reverseIndex.indexedRangeWithRLock()
   222  	entry.reverseIndex.RUnlock()
   223  	return min, max
   224  }
   225  
   226  // ReconciledOnIndexSeries attempts to retrieve the most recent index entry from the
   227  // shard if the entry this method was called on was never inserted there. If there
   228  // is an error during retrieval, simply returns the current entry. Additionally,
   229  // returns a cleanup function to run once finished using the reconciled entry and
   230  // a boolean value indicating whether the result came from reconciliation or not.
   231  func (entry *Entry) ReconciledOnIndexSeries() (doc.OnIndexSeries, resource.SimpleCloser, bool) {
   232  	if entry.insertTime.Load() > 0 {
   233  		return entry, resource.SimpleCloserFn(func() {}), false
   234  	}
   235  
   236  	e, _, err := entry.Shard.TryRetrieveSeriesAndIncrementReaderWriterCount(entry.ID)
   237  	if err != nil || e == nil {
   238  		return entry, resource.SimpleCloserFn(func() {}), false
   239  	}
   240  
   241  	// NB: attempt to merge the index series here, to ensure the returned
   242  	// reconciled series will have each index block marked from both this and the
   243  	// reconciliated series.
   244  	entry.mergeInto(e)
   245  
   246  	return e, resource.SimpleCloserFn(func() {
   247  		e.DecrementReaderWriterCount()
   248  	}), true
   249  }
   250  
   251  // MergeEntryIndexBlockStates merges the given states into the current
   252  // indexed entry.
   253  func (entry *Entry) MergeEntryIndexBlockStates(states doc.EntryIndexBlockStates) {
   254  	entry.reverseIndex.Lock()
   255  	for t, state := range states {
   256  		set := false
   257  		if state.Success {
   258  			set = true
   259  			entry.reverseIndex.setSuccessWithWLock(t)
   260  		} else {
   261  			// NB: setSuccessWithWLock(t) will perform the logic to determine if
   262  			// minIndexedT/maxIndexedT need to be updated; if this is not being called
   263  			// these should be updated.
   264  			if entry.reverseIndex.maxIndexedT < t {
   265  				entry.reverseIndex.maxIndexedT = t
   266  			}
   267  			if entry.reverseIndex.minIndexedT > t {
   268  				entry.reverseIndex.minIndexedT = t
   269  			}
   270  		}
   271  
   272  		if state.Attempt {
   273  			set = true
   274  			entry.reverseIndex.setAttemptWithWLock(t, false)
   275  		}
   276  
   277  		if !set {
   278  			// NB: if not set through the above methods, need to create an index block
   279  			// state at the given timestamp.
   280  			entry.reverseIndex.states[t] = doc.EntryIndexBlockState{}
   281  		}
   282  	}
   283  
   284  	entry.reverseIndex.Unlock()
   285  }
   286  
   287  // NeedsIndexUpdate returns a bool to indicate if the Entry needs to be indexed
   288  // for the provided blockStart. It only allows a single index attempt at a time
   289  // for a single entry.
   290  // NB(prateek): NeedsIndexUpdate is a CAS, i.e. when this method returns true, it
   291  // also sets state on the entry to indicate that a write for the given blockStart
   292  // is going to be sent to the index, and other go routines should not attempt the
   293  // same write. Callers are expected to ensure they follow this guideline.
   294  // Further, every call to NeedsIndexUpdate which returns true needs to have a corresponding
   295  // OnIndexFinalize() call. This is required for correct lifecycle maintenance.
   296  func (entry *Entry) NeedsIndexUpdate(indexBlockStartForWrite xtime.UnixNano) bool {
   297  	// first we try the low-cost path: acquire a RLock and see if the given block start
   298  	// has been marked successful or that we've attempted it.
   299  	entry.reverseIndex.RLock()
   300  	alreadyIndexedOrAttempted := entry.reverseIndex.indexedOrAttemptedWithRLock(indexBlockStartForWrite)
   301  	entry.reverseIndex.RUnlock()
   302  	if alreadyIndexedOrAttempted {
   303  		// if so, the entry does not need to be indexed.
   304  		return false
   305  	}
   306  
   307  	// now acquire a write lock and set that we're going to attempt to do this so we don't try
   308  	// multiple times.
   309  	entry.reverseIndex.Lock()
   310  	// NB(prateek): not defer-ing here, need to avoid the the extra ~150ns to minimize contention.
   311  
   312  	// but first, we have to ensure no one has done so since we released the read lock
   313  	alreadyIndexedOrAttempted = entry.reverseIndex.indexedOrAttemptedWithRLock(indexBlockStartForWrite)
   314  	if alreadyIndexedOrAttempted {
   315  		entry.reverseIndex.Unlock()
   316  		return false
   317  	}
   318  
   319  	entry.reverseIndex.setAttemptWithWLock(indexBlockStartForWrite, true)
   320  	entry.reverseIndex.Unlock()
   321  	return true
   322  }
   323  
   324  // OnIndexPrepare prepares the Entry to be handed off to the indexing sub-system.
   325  // NB(prateek): we retain the ref count on the entry while the indexing is pending,
   326  // the callback executed on the entry once the indexing is completed releases this
   327  // reference.
   328  func (entry *Entry) OnIndexPrepare(blockStartNanos xtime.UnixNano) {
   329  	entry.reverseIndex.Lock()
   330  	entry.reverseIndex.setAttemptWithWLock(blockStartNanos, true)
   331  	entry.reverseIndex.Unlock()
   332  	entry.IncrementReaderWriterCount()
   333  }
   334  
   335  // OnIndexSuccess marks the given block start as successfully indexed.
   336  func (entry *Entry) OnIndexSuccess(blockStartNanos xtime.UnixNano) {
   337  	entry.reverseIndex.Lock()
   338  	entry.reverseIndex.setSuccessWithWLock(blockStartNanos)
   339  	entry.reverseIndex.Unlock()
   340  }
   341  
   342  // OnIndexFinalize marks any attempt for the given block start as finished
   343  // and decrements the entry ref count.
   344  func (entry *Entry) OnIndexFinalize(blockStartNanos xtime.UnixNano) {
   345  	entry.reverseIndex.Lock()
   346  	entry.reverseIndex.setAttemptWithWLock(blockStartNanos, false)
   347  	entry.reverseIndex.Unlock()
   348  	// indicate the index has released held reference for provided write
   349  	entry.DecrementReaderWriterCount()
   350  }
   351  
   352  // IfAlreadyIndexedMarkIndexSuccessAndFinalize marks the entry as successfully
   353  // indexed if already indexed and returns true. Otherwise returns false.
   354  func (entry *Entry) IfAlreadyIndexedMarkIndexSuccessAndFinalize(
   355  	blockStart xtime.UnixNano,
   356  ) bool {
   357  	successAlready := false
   358  	entry.reverseIndex.Lock()
   359  	for _, state := range entry.reverseIndex.states {
   360  		if state.Success {
   361  			successAlready = true
   362  			break
   363  		}
   364  	}
   365  	if successAlready {
   366  		entry.reverseIndex.setSuccessWithWLock(blockStart)
   367  		entry.reverseIndex.setAttemptWithWLock(blockStart, false)
   368  	}
   369  	entry.reverseIndex.Unlock()
   370  	if successAlready {
   371  		// indicate the index has released held reference for provided write
   372  		entry.DecrementReaderWriterCount()
   373  	}
   374  	return successAlready
   375  }
   376  
   377  // TryMarkIndexGarbageCollected checks if the entry is eligible to be garbage collected
   378  // from the index. If so, it marks the entry as GCed and returns true. Otherwise returns false.
   379  func (entry *Entry) TryMarkIndexGarbageCollected() bool {
   380  	// Since series insertions + index insertions are done separately async, it is possible for
   381  	// a series to be in the index but not have data written yet, and so any series not in the
   382  	// lookup yet we cannot yet consider empty.
   383  	e, _, err := entry.Shard.TryRetrieveSeriesAndIncrementReaderWriterCount(entry.ID)
   384  	if m3errors.Is(err, errShardNotOpen) {
   385  		// Shard is closing, all entries which belonged to it should be gc'ed.
   386  		entry.metrics.gcSuccessShardClosed.Inc(1)
   387  		entry.IndexGarbageCollected.Store(true)
   388  		return true
   389  	}
   390  
   391  	if err != nil {
   392  		entry.metrics.noGcErr.Inc(1)
   393  		return false
   394  	}
   395  
   396  	if e == nil {
   397  		entry.metrics.noGcNil.Inc(1)
   398  		return false
   399  	}
   400  
   401  	defer e.DecrementReaderWriterCount()
   402  
   403  	// Was reconciled if the entry retrieved from the shard differs from the current.
   404  	if e != entry {
   405  		// If this entry needs further reconciliation, merge this entry's index
   406  		// states into the
   407  		entry.reverseIndex.RLock()
   408  		e.MergeEntryIndexBlockStates(entry.reverseIndex.states)
   409  		entry.reverseIndex.RUnlock()
   410  	}
   411  
   412  	// Consider non-empty if the entry is still being held since this could indicate
   413  	// another thread holding a new series prior to writing to it.
   414  	if e.ReaderWriterCount() > 1 {
   415  		entry.metrics.noGcHasReaders.Inc(1)
   416  		return false
   417  	}
   418  
   419  	// Series must be empty to be GCed. This happens when the data and index are flushed to disk and
   420  	// so the series no longer has in-mem data.
   421  	if !e.Series.IsEmpty() {
   422  		entry.metrics.noGcNotEmptySeries.Inc(1)
   423  		return false
   424  	}
   425  
   426  	// Mark as GCed from index so the entry can be safely cleaned up in the shard.
   427  	// The reference to this entry from the index is removed by the code path that
   428  	// marks this GCed bool.
   429  	e.metrics.gcSuccessEmpty.Inc(1)
   430  	e.IndexGarbageCollected.Store(true)
   431  
   432  	if e != entry {
   433  		entry.metrics.gcNeedsReconcile.Inc(1)
   434  	} else {
   435  		entry.metrics.gcNoReconcile.Inc(1)
   436  	}
   437  
   438  	return true
   439  }
   440  
   441  // mergeInto merges this entry index blocks into the provided index series.
   442  func (entry *Entry) mergeInto(indexSeries doc.OnIndexSeries) {
   443  	if entry == indexSeries {
   444  		// NB: short circuit if attempting to merge an entry into itself.
   445  		return
   446  	}
   447  
   448  	entry.reverseIndex.RLock()
   449  	indexSeries.MergeEntryIndexBlockStates(entry.reverseIndex.states)
   450  	entry.reverseIndex.RUnlock()
   451  }
   452  
   453  // TryReconcileDuplicates attempts to reconcile the index states of this entry.
   454  func (entry *Entry) TryReconcileDuplicates() {
   455  	// Since series insertions + index insertions are done separately async, it is possible for
   456  	// a series to be in the index but not have data written yet, and so any series not in the
   457  	// lookup yet we cannot yet consider empty.
   458  	e, _, err := entry.Shard.TryRetrieveSeriesAndIncrementReaderWriterCount(entry.ID)
   459  	if err != nil || e == nil {
   460  		return
   461  	}
   462  
   463  	if e != entry {
   464  		entry.mergeInto(e)
   465  		entry.metrics.duplicateNeedsReconcile.Inc(1)
   466  	} else {
   467  		entry.metrics.duplicateNoReconcile.Inc(1)
   468  	}
   469  
   470  	e.DecrementReaderWriterCount()
   471  }
   472  
   473  // NeedsIndexGarbageCollected checks if the entry is eligible to be garbage collected
   474  // from the index. Otherwise returns false.
   475  func (entry *Entry) NeedsIndexGarbageCollected() bool {
   476  	// This is a cheaper check that loading the entry from the shard again
   477  	// which makes it cheaper to run frequently.
   478  	// It may not be as accurate, but it's fine for an approximation since
   479  	// only a single series in a segment needs to return true to trigger an
   480  	// index segment to be garbage collected.
   481  	if entry.insertTime.Load() == 0 {
   482  		return false // Not inserted, does not need garbage collection.
   483  	}
   484  
   485  	// NB(antanas): Entries need to be GC'ed for closed shards.
   486  	// Orphan entries will cause problems if same shard returns to the same node.
   487  	if entry.Shard.Closed() {
   488  		return true
   489  	}
   490  	// Check that a write is not potentially pending and the series is empty.
   491  	return entry.ReaderWriterCount() == 0 && entry.Series.IsEmpty()
   492  }
   493  
   494  // SetInsertTime marks the entry as having been inserted into the shard at a given timestamp.
   495  func (entry *Entry) SetInsertTime(t time.Time) {
   496  	entry.insertTime.Store(t.UnixNano())
   497  }
   498  
   499  // Write writes a new value.
   500  func (entry *Entry) Write(
   501  	ctx context.Context,
   502  	timestamp xtime.UnixNano,
   503  	value float64,
   504  	unit xtime.Unit,
   505  	annotation []byte,
   506  	wOpts series.WriteOptions,
   507  ) (bool, series.WriteType, error) {
   508  	if err := entry.maybeIndex(timestamp); err != nil {
   509  		return false, 0, err
   510  	}
   511  	return entry.Series.Write(
   512  		ctx,
   513  		timestamp,
   514  		value,
   515  		unit,
   516  		annotation,
   517  		wOpts,
   518  	)
   519  }
   520  
   521  // LoadBlock loads a single block into the series.
   522  func (entry *Entry) LoadBlock(
   523  	block block.DatabaseBlock,
   524  	writeType series.WriteType,
   525  ) error {
   526  	// TODO(bodu): We can remove this once we have index snapshotting as index snapshots will
   527  	// contained snapshotted index segments that cover snapshotted data.
   528  	if err := entry.maybeIndex(block.StartTime()); err != nil {
   529  		return err
   530  	}
   531  	return entry.Series.LoadBlock(block, writeType)
   532  }
   533  
   534  // UniqueIndex is the unique index for the series.
   535  func (entry *Entry) UniqueIndex() uint64 {
   536  	return entry.Series.UniqueIndex()
   537  }
   538  
   539  func (entry *Entry) maybeIndex(timestamp xtime.UnixNano) error {
   540  	idx := entry.indexWriter
   541  	if idx == nil {
   542  		return nil
   543  	}
   544  	if !entry.NeedsIndexUpdate(idx.BlockStartForWriteTime(timestamp)) {
   545  		return nil
   546  	}
   547  	entry.pendingIndexBatchSizeOne[0] = writes.PendingIndexInsert{
   548  		Entry: index.WriteBatchEntry{
   549  			Timestamp:     timestamp,
   550  			OnIndexSeries: entry,
   551  			EnqueuedAt:    entry.nowFn(),
   552  		},
   553  		Document: entry.Series.Metadata(),
   554  	}
   555  	entry.OnIndexPrepare(idx.BlockStartForWriteTime(timestamp))
   556  	return idx.WritePending(entry.pendingIndexBatchSizeOne)
   557  }
   558  
   559  // SeriesRef returns the series read write ref.
   560  func (entry *Entry) SeriesRef() (bootstrap.SeriesRef, error) {
   561  	return entry, nil
   562  }
   563  
   564  // ReleaseRef must be called after using the series ref
   565  // to release the reference count to the series so it can
   566  // be expired by the owning shard eventually.
   567  func (entry *Entry) ReleaseRef() {
   568  	entry.DecrementReaderWriterCount()
   569  }
   570  
   571  // entryIndexState is used to capture the state of indexing for a single shard
   572  // entry. It's used to prevent redundant indexing operations.
   573  // NB(prateek): We need this amount of state because in the worst case, as we can have 3 active blocks being
   574  // written to. Albeit that's an edge case due to bad configuration. Even outside of that, 2 blocks can
   575  // be written to due to delayed, out of order writes. Consider an index block size of 2h, and buffer
   576  // past of 10m. Say a write comes in at 2.05p (wallclock) for 2.05p (timestamp in the write), we'd index
   577  // the entry, and update the entry to have a success for 4p. Now imagine another write
   578  // comes in at 2.06p (wallclock) for 1.57p (timestamp in the write). We need to differentiate that we don't
   579  // have a write for the 12-2p block from the 2-4p block, or we'd drop the late write.
   580  type entryIndexState struct {
   581  	sync.RWMutex
   582  	states                   doc.EntryIndexBlockStates
   583  	minIndexedT, maxIndexedT xtime.UnixNano
   584  }
   585  
   586  func newEntryIndexState() entryIndexState {
   587  	return entryIndexState{
   588  		states: make(doc.EntryIndexBlockStates, 4),
   589  	}
   590  }
   591  
   592  func (s *entryIndexState) indexedRangeWithRLock() (xtime.UnixNano, xtime.UnixNano) {
   593  	return s.minIndexedT, s.maxIndexedT
   594  }
   595  
   596  func (s *entryIndexState) indexedWithRLock(t xtime.UnixNano) bool {
   597  	v, ok := s.states[t]
   598  	if ok {
   599  		return v.Success
   600  	}
   601  	return false
   602  }
   603  
   604  func (s *entryIndexState) indexedOrAttemptedWithRLock(t xtime.UnixNano) bool {
   605  	v, ok := s.states[t]
   606  	if ok {
   607  		return v.Success || v.Attempt
   608  	}
   609  	return false
   610  }
   611  
   612  func (s *entryIndexState) setSuccessWithWLock(t xtime.UnixNano) {
   613  	if s.indexedWithRLock(t) {
   614  		return
   615  	}
   616  
   617  	// NB(r): If not inserted state yet that means we need to make an insertion,
   618  	// this will happen if synchronously indexing and we haven't called
   619  	// NeedIndexUpdate before we indexed the series.
   620  	s.states[t] = doc.EntryIndexBlockState{
   621  		Success: true,
   622  	}
   623  
   624  	if t > s.maxIndexedT {
   625  		s.maxIndexedT = t
   626  	}
   627  	if t < s.minIndexedT || s.minIndexedT == 0 {
   628  		s.minIndexedT = t
   629  	}
   630  }
   631  
   632  func (s *entryIndexState) setAttemptWithWLock(t xtime.UnixNano, attempt bool) {
   633  	v, ok := s.states[t]
   634  	if ok {
   635  		if v.Success {
   636  			return // Attempt is not relevant if success.
   637  		}
   638  		v.Attempt = attempt
   639  		s.states[t] = v
   640  		return
   641  	}
   642  
   643  	s.states[t] = doc.EntryIndexBlockState{
   644  		Attempt: attempt,
   645  	}
   646  }