github.com/m3db/m3@v1.5.0/src/dbnode/storage/block/block.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package block
    22  
    23  import (
    24  	"errors"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/dbnode/namespace"
    30  	"github.com/m3db/m3/src/dbnode/ts"
    31  	"github.com/m3db/m3/src/dbnode/x/xio"
    32  	"github.com/m3db/m3/src/x/context"
    33  	"github.com/m3db/m3/src/x/ident"
    34  	xtime "github.com/m3db/m3/src/x/time"
    35  )
    36  
    37  var (
    38  	errReadFromClosedBlock       = errors.New("attempt to read from a closed block")
    39  	errTriedToMergeBlockFromDisk = errors.New("[invariant violated] tried to merge a block that was retrieved from disk")
    40  
    41  	timeZero = xtime.UnixNano(0)
    42  )
    43  
    44  type dbBlock struct {
    45  	sync.RWMutex
    46  
    47  	nsCtx          namespace.Context
    48  	opts           Options
    49  	startUnixNanos xtime.UnixNano
    50  	segment        ts.Segment
    51  	length         int
    52  
    53  	blockSize time.Duration
    54  
    55  	lastReadUnixNanos int64
    56  
    57  	mergeTarget DatabaseBlock
    58  
    59  	seriesID ident.ID
    60  
    61  	onEvicted OnEvictedFromWiredList
    62  
    63  	// listState contains state that the Wired List requires in order to track a block's
    64  	// position in the wired list. All the state in this struct is "owned" by the wired
    65  	// list and should only be accessed by the Wired List itself. Does not require any
    66  	// synchronization because the WiredList is not concurrent.
    67  	listState listState
    68  
    69  	checksum uint32
    70  
    71  	wasRetrievedFromDisk bool
    72  	closed               bool
    73  }
    74  
    75  type listState struct {
    76  	next                  DatabaseBlock
    77  	prev                  DatabaseBlock
    78  	enteredListAtUnixNano int64
    79  }
    80  
    81  // NewDatabaseBlock creates a new DatabaseBlock instance.
    82  func NewDatabaseBlock(
    83  	start xtime.UnixNano,
    84  	blockSize time.Duration,
    85  	segment ts.Segment,
    86  	opts Options,
    87  	nsCtx namespace.Context,
    88  ) DatabaseBlock {
    89  	b := &dbBlock{
    90  		nsCtx:          nsCtx,
    91  		opts:           opts,
    92  		startUnixNanos: start,
    93  		blockSize:      blockSize,
    94  		closed:         false,
    95  	}
    96  	if segment.Len() > 0 {
    97  		b.resetSegmentWithLock(segment)
    98  	}
    99  	return b
   100  }
   101  
   102  func (b *dbBlock) StartTime() xtime.UnixNano {
   103  	b.RLock()
   104  	start := b.startWithRLock()
   105  	b.RUnlock()
   106  	return start
   107  }
   108  
   109  func (b *dbBlock) BlockSize() time.Duration {
   110  	b.RLock()
   111  	size := b.blockSize
   112  	b.RUnlock()
   113  	return size
   114  }
   115  
   116  func (b *dbBlock) startWithRLock() xtime.UnixNano {
   117  	return b.startUnixNanos
   118  }
   119  
   120  func (b *dbBlock) SetLastReadTime(value xtime.UnixNano) {
   121  	// Use an int64 to avoid needing a write lock for
   122  	// this high frequency called method (i.e. each individual
   123  	// read needing a write lock would be excessive)
   124  	atomic.StoreInt64(&b.lastReadUnixNanos, int64(value))
   125  }
   126  
   127  func (b *dbBlock) LastReadTime() xtime.UnixNano {
   128  	return xtime.UnixNano(atomic.LoadInt64(&b.lastReadUnixNanos))
   129  }
   130  
   131  func (b *dbBlock) Empty() bool {
   132  	b.RLock()
   133  	empty := b.length == 0
   134  	b.RUnlock()
   135  	return empty
   136  }
   137  
   138  func (b *dbBlock) Len() int {
   139  	b.RLock()
   140  	length := b.length
   141  	b.RUnlock()
   142  	return length
   143  }
   144  
   145  func (b *dbBlock) Checksum() (uint32, error) {
   146  	b.RLock()
   147  	checksum := b.checksum
   148  	hasMergeTarget := b.mergeTarget != nil
   149  	b.RUnlock()
   150  
   151  	if !hasMergeTarget {
   152  		return checksum, nil
   153  	}
   154  
   155  	b.Lock()
   156  	defer b.Unlock()
   157  	// Since we released the lock temporarily we need to check again.
   158  	hasMergeTarget = b.mergeTarget != nil
   159  	if !hasMergeTarget {
   160  		return b.checksum, nil
   161  	}
   162  
   163  	tempCtx := b.opts.ContextPool().Get()
   164  
   165  	stream, err := b.streamWithRLock(tempCtx)
   166  	if err != nil {
   167  		return 0, err
   168  	}
   169  
   170  	// This will merge the existing stream with the merge target's stream,
   171  	// as well as recalculate and store the new checksum.
   172  	err = b.forceMergeWithLock(tempCtx, stream)
   173  	if err != nil {
   174  		return 0, err
   175  	}
   176  
   177  	return b.checksum, nil
   178  }
   179  
   180  func (b *dbBlock) Stream(blocker context.Context) (xio.BlockReader, error) {
   181  	lockUpgraded := false
   182  
   183  	b.RLock()
   184  	defer func() {
   185  		if lockUpgraded {
   186  			b.Unlock()
   187  		} else {
   188  			b.RUnlock()
   189  		}
   190  	}()
   191  
   192  	if b.closed {
   193  		return xio.EmptyBlockReader, errReadFromClosedBlock
   194  	}
   195  
   196  	if b.mergeTarget == nil {
   197  		return b.streamWithRLock(blocker)
   198  	}
   199  
   200  	b.RUnlock()
   201  	lockUpgraded = true
   202  	b.Lock()
   203  
   204  	// Need to re-check everything since we upgraded the lock.
   205  	if b.closed {
   206  		return xio.EmptyBlockReader, errReadFromClosedBlock
   207  	}
   208  
   209  	stream, err := b.streamWithRLock(blocker)
   210  	if err != nil {
   211  		return xio.EmptyBlockReader, err
   212  	}
   213  
   214  	if b.mergeTarget == nil {
   215  		return stream, nil
   216  	}
   217  
   218  	// This will merge the existing stream with the merge target's stream,
   219  	// as well as recalculate and store the new checksum.
   220  	err = b.forceMergeWithLock(blocker, stream)
   221  	if err != nil {
   222  		return xio.EmptyBlockReader, err
   223  	}
   224  
   225  	// This will return a copy of the data so that it is still safe to
   226  	// close the block after calling this method.
   227  	return b.streamWithRLock(blocker)
   228  }
   229  
   230  func (b *dbBlock) HasMergeTarget() bool {
   231  	b.RLock()
   232  	hasMergeTarget := b.mergeTarget != nil
   233  	b.RUnlock()
   234  	return hasMergeTarget
   235  }
   236  
   237  func (b *dbBlock) WasRetrievedFromDisk() bool {
   238  	b.RLock()
   239  	wasRetrieved := b.wasRetrievedFromDisk
   240  	b.RUnlock()
   241  	return wasRetrieved
   242  }
   243  
   244  func (b *dbBlock) Merge(other DatabaseBlock) error {
   245  	b.Lock()
   246  	if b.wasRetrievedFromDisk || other.WasRetrievedFromDisk() {
   247  		// We use Merge to lazily merge blocks that eventually need to be flushed to disk
   248  		// If we try to perform a merge on blocks that were retrieved from disk then we've
   249  		// violated an invariant and probably have a bug that is causing data loss.
   250  		b.Unlock()
   251  		return errTriedToMergeBlockFromDisk
   252  	}
   253  
   254  	if b.mergeTarget == nil {
   255  		b.mergeTarget = other
   256  	} else {
   257  		b.mergeTarget.Merge(other)
   258  	}
   259  
   260  	b.Unlock()
   261  	return nil
   262  }
   263  
   264  func (b *dbBlock) Reset(
   265  	start xtime.UnixNano,
   266  	blockSize time.Duration,
   267  	segment ts.Segment,
   268  	nsCtx namespace.Context,
   269  ) {
   270  	b.Lock()
   271  	defer b.Unlock()
   272  	b.resetNewBlockStartWithLock(start, blockSize)
   273  	b.resetSegmentWithLock(segment)
   274  	b.nsCtx = nsCtx
   275  }
   276  
   277  func (b *dbBlock) ResetFromDisk(
   278  	start xtime.UnixNano,
   279  	blockSize time.Duration,
   280  	segment ts.Segment,
   281  	id ident.ID,
   282  	nsCtx namespace.Context,
   283  ) {
   284  	b.Lock()
   285  	defer b.Unlock()
   286  	b.resetNewBlockStartWithLock(start, blockSize)
   287  	// resetSegmentWithLock sets seriesID to nil
   288  	b.resetSegmentWithLock(segment)
   289  	b.seriesID = id
   290  	b.nsCtx = nsCtx
   291  	b.wasRetrievedFromDisk = true
   292  }
   293  
   294  func (b *dbBlock) streamWithRLock(ctx context.Context) (xio.BlockReader, error) {
   295  	start := b.startWithRLock()
   296  
   297  	// Take a copy to avoid heavy depends on cycle
   298  	segmentReader := b.opts.SegmentReaderPool().Get()
   299  	data := b.opts.BytesPool().Get(b.segment.Len())
   300  	data.IncRef()
   301  	if b.segment.Head != nil {
   302  		data.AppendAll(b.segment.Head.Bytes())
   303  	}
   304  	if b.segment.Tail != nil {
   305  		data.AppendAll(b.segment.Tail.Bytes())
   306  	}
   307  	data.DecRef()
   308  	checksum := b.segment.CalculateChecksum()
   309  	segmentReader.Reset(ts.NewSegment(data, nil, checksum, ts.FinalizeHead))
   310  	ctx.RegisterFinalizer(segmentReader)
   311  
   312  	blockReader := xio.BlockReader{
   313  		SegmentReader: segmentReader,
   314  		Start:         start,
   315  		BlockSize:     b.blockSize,
   316  	}
   317  
   318  	return blockReader, nil
   319  }
   320  
   321  func (b *dbBlock) forceMergeWithLock(ctx context.Context, stream xio.SegmentReader) error {
   322  	targetStream, err := b.mergeTarget.Stream(ctx)
   323  	if err != nil {
   324  		return err
   325  	}
   326  	start := b.startWithRLock()
   327  	mergedBlockReader := newDatabaseMergedBlockReader(b.nsCtx, start, b.blockSize,
   328  		mergeableStream{stream: stream, finalize: false},       // Should have been marked for finalization by the caller
   329  		mergeableStream{stream: targetStream, finalize: false}, // Already marked for finalization by the Stream() call above
   330  		b.opts)
   331  	mergedSegment, err := mergedBlockReader.Segment()
   332  	if err != nil {
   333  		return err
   334  	}
   335  
   336  	b.resetMergeTargetWithLock()
   337  	b.resetSegmentWithLock(mergedSegment)
   338  	return nil
   339  }
   340  
   341  func (b *dbBlock) resetNewBlockStartWithLock(start xtime.UnixNano, blockSize time.Duration) {
   342  	b.startUnixNanos = start
   343  	b.blockSize = blockSize
   344  	atomic.StoreInt64(&b.lastReadUnixNanos, 0)
   345  	b.closed = false
   346  	b.resetMergeTargetWithLock()
   347  }
   348  
   349  func (b *dbBlock) resetSegmentWithLock(seg ts.Segment) {
   350  	b.segment = seg
   351  	b.length = seg.Len()
   352  	b.checksum = seg.CalculateChecksum()
   353  	b.seriesID = nil
   354  	b.wasRetrievedFromDisk = false
   355  }
   356  
   357  func (b *dbBlock) Discard() ts.Segment {
   358  	seg, _ := b.closeAndDiscardConditionally(nil)
   359  	return seg
   360  }
   361  
   362  func (b *dbBlock) Close() {
   363  	segment, _ := b.closeAndDiscardConditionally(nil)
   364  	segment.Finalize()
   365  }
   366  
   367  func (b *dbBlock) CloseIfFromDisk() bool {
   368  	segment, ok := b.closeAndDiscardConditionally(func(b *dbBlock) bool {
   369  		return b.wasRetrievedFromDisk
   370  	})
   371  	if !ok {
   372  		return false
   373  	}
   374  	segment.Finalize()
   375  	return true
   376  }
   377  
   378  func (b *dbBlock) closeAndDiscardConditionally(condition func(b *dbBlock) bool) (ts.Segment, bool) {
   379  	b.Lock()
   380  
   381  	if condition != nil && !condition(b) {
   382  		b.Unlock()
   383  		return ts.Segment{}, false
   384  	}
   385  
   386  	if b.closed {
   387  		b.Unlock()
   388  		return ts.Segment{}, true
   389  	}
   390  
   391  	segment := b.segment
   392  	b.closed = true
   393  
   394  	b.resetMergeTargetWithLock()
   395  	b.Unlock()
   396  
   397  	if pool := b.opts.DatabaseBlockPool(); pool != nil {
   398  		pool.Put(b)
   399  	}
   400  
   401  	return segment, true
   402  }
   403  
   404  func (b *dbBlock) resetMergeTargetWithLock() {
   405  	if b.mergeTarget != nil {
   406  		b.mergeTarget.Close()
   407  	}
   408  	b.mergeTarget = nil
   409  }
   410  
   411  // Should only be used by the WiredList.
   412  func (b *dbBlock) next() DatabaseBlock {
   413  	return b.listState.next
   414  }
   415  
   416  // Should only be used by the WiredList.
   417  func (b *dbBlock) setNext(value DatabaseBlock) {
   418  	b.listState.next = value
   419  }
   420  
   421  // Should only be used by the WiredList.
   422  func (b *dbBlock) prev() DatabaseBlock {
   423  	return b.listState.prev
   424  }
   425  
   426  // Should only be used by the WiredList.
   427  func (b *dbBlock) setPrev(value DatabaseBlock) {
   428  	b.listState.prev = value
   429  }
   430  
   431  // Should only be used by the WiredList.
   432  func (b *dbBlock) enteredListAtUnixNano() int64 {
   433  	return b.listState.enteredListAtUnixNano
   434  }
   435  
   436  // Should only be used by the WiredList.
   437  func (b *dbBlock) setEnteredListAtUnixNano(value int64) {
   438  	b.listState.enteredListAtUnixNano = value
   439  }
   440  
   441  // wiredListEntry is a snapshot of a subset of the block's state that the WiredList
   442  // uses to determine if a block is eligible for inclusion in the WiredList.
   443  type wiredListEntry struct {
   444  	seriesID             ident.ID
   445  	startTime            xtime.UnixNano
   446  	closed               bool
   447  	wasRetrievedFromDisk bool
   448  }
   449  
   450  // wiredListEntry generates a wiredListEntry for the block, and should only
   451  // be used by the WiredList.
   452  func (b *dbBlock) wiredListEntry() wiredListEntry {
   453  	b.RLock()
   454  	result := wiredListEntry{
   455  		closed:               b.closed,
   456  		seriesID:             b.seriesID,
   457  		wasRetrievedFromDisk: b.wasRetrievedFromDisk,
   458  		startTime:            b.startWithRLock(),
   459  	}
   460  	b.RUnlock()
   461  	return result
   462  }
   463  
   464  func (b *dbBlock) SetOnEvictedFromWiredList(onEvicted OnEvictedFromWiredList) {
   465  	b.Lock()
   466  	b.onEvicted = onEvicted
   467  	b.Unlock()
   468  }
   469  
   470  func (b *dbBlock) OnEvictedFromWiredList() OnEvictedFromWiredList {
   471  	b.RLock()
   472  	onEvicted := b.onEvicted
   473  	b.RUnlock()
   474  	return onEvicted
   475  }
   476  
   477  type databaseSeriesBlocks struct {
   478  	elems map[xtime.UnixNano]DatabaseBlock
   479  	min   xtime.UnixNano
   480  	max   xtime.UnixNano
   481  }
   482  
   483  // NewDatabaseSeriesBlocks creates a databaseSeriesBlocks instance.
   484  func NewDatabaseSeriesBlocks(capacity int) DatabaseSeriesBlocks {
   485  	return &databaseSeriesBlocks{
   486  		elems: make(map[xtime.UnixNano]DatabaseBlock, capacity),
   487  	}
   488  }
   489  
   490  func (dbb *databaseSeriesBlocks) Len() int {
   491  	return len(dbb.elems)
   492  }
   493  
   494  func (dbb *databaseSeriesBlocks) AddBlock(block DatabaseBlock) {
   495  	start := block.StartTime()
   496  	if dbb.min.Equal(timeZero) || start.Before(dbb.min) {
   497  		dbb.min = start
   498  	}
   499  	if dbb.max.Equal(timeZero) || start.After(dbb.max) {
   500  		dbb.max = start
   501  	}
   502  	dbb.elems[start] = block
   503  }
   504  
   505  func (dbb *databaseSeriesBlocks) AddSeries(other DatabaseSeriesBlocks) {
   506  	if other == nil {
   507  		return
   508  	}
   509  	blocks := other.AllBlocks()
   510  	for _, b := range blocks {
   511  		dbb.AddBlock(b)
   512  	}
   513  }
   514  
   515  // MinTime returns the min time of the blocks contained.
   516  func (dbb *databaseSeriesBlocks) MinTime() xtime.UnixNano {
   517  	return dbb.min
   518  }
   519  
   520  // MaxTime returns the max time of the blocks contained.
   521  func (dbb *databaseSeriesBlocks) MaxTime() xtime.UnixNano {
   522  	return dbb.max
   523  }
   524  
   525  func (dbb *databaseSeriesBlocks) BlockAt(t xtime.UnixNano) (DatabaseBlock, bool) {
   526  	b, ok := dbb.elems[t]
   527  	return b, ok
   528  }
   529  
   530  func (dbb *databaseSeriesBlocks) AllBlocks() map[xtime.UnixNano]DatabaseBlock {
   531  	return dbb.elems
   532  }
   533  
   534  func (dbb *databaseSeriesBlocks) RemoveBlockAt(t xtime.UnixNano) {
   535  	if _, exists := dbb.elems[t]; !exists {
   536  		return
   537  	}
   538  	delete(dbb.elems, t)
   539  	if dbb.min != t && dbb.max != t {
   540  		return
   541  	}
   542  	dbb.min, dbb.max = timeZero, timeZero
   543  	if len(dbb.elems) == 0 {
   544  		return
   545  	}
   546  	for key := range dbb.elems {
   547  		if dbb.min == timeZero || dbb.min > key {
   548  			dbb.min = key
   549  		}
   550  		if dbb.max == timeZero || dbb.max < key {
   551  			dbb.max = key
   552  		}
   553  	}
   554  }
   555  
   556  func (dbb *databaseSeriesBlocks) RemoveAll() {
   557  	for t, block := range dbb.elems {
   558  		block.Close()
   559  		delete(dbb.elems, t)
   560  	}
   561  }
   562  
   563  func (dbb *databaseSeriesBlocks) Reset() {
   564  	// Ensure the old, possibly large map is GC'd
   565  	dbb.elems = nil
   566  	dbb.elems = make(map[xtime.UnixNano]DatabaseBlock)
   567  	dbb.min = 0
   568  	dbb.max = 0
   569  }
   570  
   571  func (dbb *databaseSeriesBlocks) Close() {
   572  	dbb.RemoveAll()
   573  	// Mark the map as nil to prevent maps that have grown large from wasting
   574  	// space in the pool (Deleting elements from a large map will not cause
   575  	// the underlying resources to shrink)
   576  	dbb.elems = nil
   577  }