github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/bootstrap/result/result_index.go

github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/bootstrap/result/result_index.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package result
    22  
    23  import (
    24  	"fmt"
    25  
    26  	"github.com/m3db/m3/src/dbnode/namespace"
    27  	"github.com/m3db/m3/src/m3ninx/doc"
    28  	"github.com/m3db/m3/src/m3ninx/index"
    29  	"github.com/m3db/m3/src/m3ninx/index/segment"
    30  	"github.com/m3db/m3/src/m3ninx/index/segment/builder"
    31  	"github.com/m3db/m3/src/m3ninx/persist"
    32  	xtime "github.com/m3db/m3/src/x/time"
    33  )
    34  
    35  // NewDefaultDocumentsBuilderAllocator returns a default mutable segment
    36  // allocator.
    37  func NewDefaultDocumentsBuilderAllocator() DocumentsBuilderAllocator {
    38  	return func() (segment.DocumentsBuilder, error) {
    39  		return builder.NewBuilderFromDocuments(builder.NewOptions())
    40  	}
    41  }
    42  
    43  type indexBootstrapResult struct {
    44  	results     IndexResults
    45  	unfulfilled ShardTimeRanges
    46  }
    47  
    48  // NewIndexBootstrapResult returns a new index bootstrap result.
    49  func NewIndexBootstrapResult() IndexBootstrapResult {
    50  	return &indexBootstrapResult{
    51  		results:     make(IndexResults),
    52  		unfulfilled: NewShardTimeRanges(),
    53  	}
    54  }
    55  
    56  func (r *indexBootstrapResult) IndexResults() IndexResults {
    57  	return r.results
    58  }
    59  
    60  func (r *indexBootstrapResult) Unfulfilled() ShardTimeRanges {
    61  	return r.unfulfilled
    62  }
    63  
    64  func (r *indexBootstrapResult) SetUnfulfilled(unfulfilled ShardTimeRanges) {
    65  	r.unfulfilled = unfulfilled
    66  }
    67  
    68  func (r *indexBootstrapResult) Add(blocks IndexBlockByVolumeType, unfulfilled ShardTimeRanges) {
    69  	r.results.Add(blocks)
    70  	r.unfulfilled.AddRanges(unfulfilled)
    71  }
    72  
    73  func (r *indexBootstrapResult) NumSeries() int {
    74  	var size int64
    75  	for _, blockByVolumeType := range r.results {
    76  		for _, b := range blockByVolumeType.data {
    77  			for _, s := range b.segments {
    78  				size += s.Segment().Size()
    79  			}
    80  		}
    81  	}
    82  	return int(size)
    83  }
    84  
    85  // NewIndexBuilder creates a wrapped locakble index seg builder.
    86  func NewIndexBuilder(builder segment.DocumentsBuilder) *IndexBuilder {
    87  	return &IndexBuilder{
    88  		builder: builder,
    89  	}
    90  }
    91  
    92  // FlushBatch flushes a batch of documents to the underlying segment builder.
    93  func (b *IndexBuilder) FlushBatch(batch []doc.Metadata) ([]doc.Metadata, error) {
    94  	if len(batch) == 0 {
    95  		// Last flush might not have any docs enqueued
    96  		return batch, nil
    97  	}
    98  
    99  	// NB(bodu): Prevent concurrent writes.
   100  	// Although it seems like there's no need to lock on writes since
   101  	// each block should ONLY be getting built in a single thread.
   102  	err := b.builder.InsertBatch(index.Batch{
   103  		Docs:                batch,
   104  		AllowPartialUpdates: true,
   105  	})
   106  	if err != nil && index.IsBatchPartialError(err) {
   107  		// If after filtering out duplicate ID errors
   108  		// there are no errors, then this was a successful
   109  		// insertion.
   110  		batchErr := err.(*index.BatchPartialError)
   111  		// NB(r): FilterDuplicateIDErrors returns nil
   112  		// if no errors remain after filtering duplicate ID
   113  		// errors, this case is covered in unit tests.
   114  		err = batchErr.FilterDuplicateIDErrors()
   115  	}
   116  	if err != nil {
   117  		return batch, err
   118  	}
   119  
   120  	// Reset docs batch for reuse
   121  	var empty doc.Metadata
   122  	for i := range batch {
   123  		batch[i] = empty
   124  	}
   125  	batch = batch[:0]
   126  	return batch, nil
   127  }
   128  
   129  // Builder returns the underlying index segment docs builder.
   130  func (b *IndexBuilder) Builder() segment.DocumentsBuilder {
   131  	return b.builder
   132  }
   133  
   134  // AddBlockIfNotExists adds an index block if it does not already exist to the index results.
   135  func (r IndexResults) AddBlockIfNotExists(
   136  	t xtime.UnixNano,
   137  	idxopts namespace.IndexOptions,
   138  ) {
   139  	// NB(r): The reason we can align by the retention block size and guarantee
   140  	// there is only one entry for this time is because index blocks must be a
   141  	// positive multiple of the data block size, making it easy to map a data
   142  	// block entry to at most one index block entry.
   143  	blockStart := t.Truncate(idxopts.BlockSize())
   144  	blockStartNanos := blockStart
   145  
   146  	_, exists := r[blockStartNanos]
   147  	if !exists {
   148  		r[blockStartNanos] = NewIndexBlockByVolumeType(blockStart)
   149  	}
   150  }
   151  
   152  // Add will add an index block to the collection, merging if one already
   153  // exists.
   154  func (r IndexResults) Add(blocks IndexBlockByVolumeType) {
   155  	if blocks.BlockStart().IsZero() {
   156  		return
   157  	}
   158  
   159  	// Merge results
   160  	blockStart := blocks.BlockStart()
   161  	existing, ok := r[blockStart]
   162  	if !ok {
   163  		r[blockStart] = blocks
   164  		return
   165  	}
   166  
   167  	r[blockStart] = existing.Merged(blocks)
   168  }
   169  
   170  // AddResults will add another set of index results to the collection, merging
   171  // if index blocks already exists.
   172  func (r IndexResults) AddResults(other IndexResults) {
   173  	for _, blocks := range other {
   174  		r.Add(blocks)
   175  	}
   176  }
   177  
   178  // MarkFulfilled will mark an index block as fulfilled, either partially or
   179  // wholly as specified by the shard time ranges passed.
   180  func (r IndexResults) MarkFulfilled(
   181  	t xtime.UnixNano,
   182  	fulfilled ShardTimeRanges,
   183  	indexVolumeType persist.IndexVolumeType,
   184  	idxopts namespace.IndexOptions,
   185  ) error {
   186  	// NB(r): The reason we can align by the retention block size and guarantee
   187  	// there is only one entry for this time is because index blocks must be a
   188  	// positive multiple of the data block size, making it easy to map a data
   189  	// block entry to at most one index block entry.
   190  	blockStart := t.Truncate(idxopts.BlockSize())
   191  	blockStartNanos := blockStart
   192  
   193  	blockRange := xtime.Range{
   194  		Start: blockStart,
   195  		End:   blockStart.Add(idxopts.BlockSize()),
   196  	}
   197  
   198  	// First check fulfilled is correct
   199  	min, max := fulfilled.MinMax()
   200  	if min.Before(blockRange.Start) || max.After(blockRange.End) {
   201  		return fmt.Errorf("fulfilled range %s is outside of index block range: %s",
   202  			fulfilled.SummaryString(), blockRange.String())
   203  	}
   204  
   205  	blocks, exists := r[blockStartNanos]
   206  	if !exists {
   207  		blocks = NewIndexBlockByVolumeType(blockStart)
   208  		r[blockStartNanos] = blocks
   209  	}
   210  
   211  	block, exists := blocks.data[indexVolumeType]
   212  	if !exists {
   213  		block = NewIndexBlock(nil, nil)
   214  		blocks.data[indexVolumeType] = block
   215  	}
   216  	blocks.data[indexVolumeType] = block.Merged(NewIndexBlock(nil, fulfilled))
   217  	return nil
   218  }
   219  
   220  // MergedIndexBootstrapResult returns a merged result of two bootstrap results.
   221  // It is a mutating function that mutates the larger result by adding the
   222  // smaller result to it and then finally returns the mutated result.
   223  func MergedIndexBootstrapResult(i, j IndexBootstrapResult) IndexBootstrapResult {
   224  	if i == nil {
   225  		return j
   226  	}
   227  	if j == nil {
   228  		return i
   229  	}
   230  	sizeI, sizeJ := 0, 0
   231  	for _, ir := range i.IndexResults() {
   232  		for _, b := range ir.data {
   233  			sizeI += len(b.Segments())
   234  		}
   235  	}
   236  	for _, ir := range j.IndexResults() {
   237  		for _, b := range ir.data {
   238  			sizeJ += len(b.Segments())
   239  		}
   240  	}
   241  	if sizeI >= sizeJ {
   242  		i.IndexResults().AddResults(j.IndexResults())
   243  		i.Unfulfilled().AddRanges(j.Unfulfilled())
   244  		return i
   245  	}
   246  	j.IndexResults().AddResults(i.IndexResults())
   247  	j.Unfulfilled().AddRanges(i.Unfulfilled())
   248  	return j
   249  }
   250  
   251  // NewIndexBlock returns a new bootstrap index block result.
   252  func NewIndexBlock(
   253  	segments []Segment,
   254  	fulfilled ShardTimeRanges,
   255  ) IndexBlock {
   256  	if fulfilled == nil {
   257  		fulfilled = NewShardTimeRanges()
   258  	}
   259  	return IndexBlock{
   260  		segments:  segments,
   261  		fulfilled: fulfilled,
   262  	}
   263  }
   264  
   265  // Segments returns the segments.
   266  func (b IndexBlock) Segments() []Segment {
   267  	return b.segments
   268  }
   269  
   270  // Fulfilled returns the fulfilled time ranges by this index block.
   271  func (b IndexBlock) Fulfilled() ShardTimeRanges {
   272  	return b.fulfilled
   273  }
   274  
   275  // Merged returns a new merged index block, currently it just appends the
   276  // list of segments from the other index block and the caller merges
   277  // as they see necessary.
   278  func (b IndexBlock) Merged(other IndexBlock) IndexBlock {
   279  	r := b
   280  	if len(other.segments) > 0 {
   281  		r.segments = append(r.segments, other.segments...)
   282  	}
   283  	if !other.fulfilled.IsEmpty() {
   284  		r.fulfilled = b.fulfilled.Copy()
   285  		r.fulfilled.AddRanges(other.fulfilled)
   286  	}
   287  	return r
   288  }
   289  
   290  // NewIndexBlockByVolumeType returns a new bootstrap index blocks by volume type result.
   291  func NewIndexBlockByVolumeType(blockStart xtime.UnixNano) IndexBlockByVolumeType {
   292  	return IndexBlockByVolumeType{
   293  		blockStart: blockStart,
   294  		data:       make(map[persist.IndexVolumeType]IndexBlock),
   295  	}
   296  }
   297  
   298  // BlockStart returns the block start.
   299  func (b IndexBlockByVolumeType) BlockStart() xtime.UnixNano {
   300  	return b.blockStart
   301  }
   302  
   303  // GetBlock returns an IndexBlock for volumeType.
   304  func (b IndexBlockByVolumeType) GetBlock(volumeType persist.IndexVolumeType) (IndexBlock, bool) {
   305  	block, ok := b.data[volumeType]
   306  	return block, ok
   307  }
   308  
   309  // SetBlock sets an IndexBlock for volumeType.
   310  func (b IndexBlockByVolumeType) SetBlock(volumeType persist.IndexVolumeType, block IndexBlock) {
   311  	b.data[volumeType] = block
   312  }
   313  
   314  // Iter returns the underlying iterable map data.
   315  func (b IndexBlockByVolumeType) Iter() map[persist.IndexVolumeType]IndexBlock {
   316  	return b.data
   317  }
   318  
   319  // Merged returns a new merged index block by volume type.
   320  // It merges the underlying index blocks together by index volume type.
   321  func (b IndexBlockByVolumeType) Merged(other IndexBlockByVolumeType) IndexBlockByVolumeType {
   322  	r := b
   323  	for volumeType, otherBlock := range other.data {
   324  		existing, ok := r.data[volumeType]
   325  		if !ok {
   326  			r.data[volumeType] = otherBlock
   327  			continue
   328  		}
   329  		r.data[volumeType] = existing.Merged(otherBlock)
   330  	}
   331  	return r
   332  }