github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/index_lookup.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  
    28  	"github.com/m3db/m3/src/dbnode/digest"
    29  	xmsgpack "github.com/m3db/m3/src/dbnode/persist/fs/msgpack"
    30  	"github.com/m3db/m3/src/x/ident"
    31  	"github.com/m3db/m3/src/x/mmap"
    32  )
    33  
    34  const mmapPersistFsSummariesFileName = "mmap.persist.fs.summariesfile"
    35  
    36  var errCloneShouldNotBeCloned = errors.New("clones should not be cloned")
    37  
    38  // nearestIndexOffsetLookup provides a way of quickly determining the nearest offset of an
    39  // ID in the index file. It is not safe for concurrent use
    40  type nearestIndexOffsetLookup struct {
    41  	summaryIDsOffsets []xmsgpack.IndexSummaryToken
    42  	// bytes from file mmap'd into anonymous region
    43  	summariesMmap mmap.Descriptor
    44  	isClone       bool
    45  }
    46  
    47  func newNearestIndexOffsetLookup(
    48  	summaryIDsOffsets []xmsgpack.IndexSummaryToken,
    49  	summariesMmap mmap.Descriptor,
    50  ) *nearestIndexOffsetLookup {
    51  	return &nearestIndexOffsetLookup{
    52  		summaryIDsOffsets: summaryIDsOffsets,
    53  		summariesMmap:     summariesMmap,
    54  		isClone:           false,
    55  	}
    56  }
    57  
    58  func (il *nearestIndexOffsetLookup) concurrentClone() (*nearestIndexOffsetLookup, error) {
    59  	if il.isClone {
    60  		return nil, errCloneShouldNotBeCloned
    61  	}
    62  
    63  	return &nearestIndexOffsetLookup{
    64  		summaryIDsOffsets: il.summaryIDsOffsets,
    65  		summariesMmap:     il.summariesMmap,
    66  		isClone:           true,
    67  	}, nil
    68  }
    69  
    70  // getNearestIndexFileOffset returns either:
    71  //     1. The offset in the index file for the specified series
    72  //     2. The offset in the index file for the the series in the summaries file
    73  //        that satisfies the following two constraints:
    74  //            1. Is closest to the desired series in the index file
    75  //            2. Is BEFORE the desired series in the index file (because we
    76  //               we scan the index file sequentially in a forward-moving manner)
    77  // In other words, the returned offset can always be used as a starting point to
    78  // begin scanning the index file for the desired series.
    79  func (il *nearestIndexOffsetLookup) getNearestIndexFileOffset(
    80  	id ident.ID,
    81  	resources ReusableSeekerResources,
    82  ) (int64, error) {
    83  	idBytes := id.Bytes()
    84  
    85  	min := 0
    86  	max := len(il.summaryIDsOffsets) - 1
    87  
    88  	// The summaries file only contains a fraction of the series that are in
    89  	// the index file itself. Because of that, the binary search that we're
    90  	// performing is "optimistic". We're trying to find either an exact match,
    91  	// OR the nearest match that is to the left of the series we're searching
    92  	// for (so we keep track of it everytime we move right). We start with an
    93  	// assumption that the best match so far is at index 0, because in the worst
    94  	// case scenario if we don't find a single "match", then the caller should
    95  	// start at index 0 and scan until they encounter an entry that tells them
    96  	// that the ID they're looking for does not exist (because the IDs in the
    97  	// index are sorted).
    98  	bestMatchSoFar := int64(0)
    99  
   100  	for {
   101  		if min > max {
   102  			return bestMatchSoFar, nil
   103  		}
   104  
   105  		idx := (max + min) / 2
   106  		summaryBytesMetadata := il.summaryIDsOffsets[idx]
   107  		compBytes := summaryBytesMetadata.ID(il.summariesMmap.Bytes)
   108  		comparison := bytes.Compare(idBytes, compBytes)
   109  
   110  		// Found it
   111  		if comparison == 0 {
   112  			indexOffset, err := summaryBytesMetadata.IndexOffset(
   113  				il.summariesMmap.Bytes, resources.byteDecoderStream, resources.msgpackDecoder)
   114  			// Should never happen, either something is really wrong with the code or
   115  			// the file on disk was corrupted
   116  			if err != nil {
   117  				return -1, err
   118  			}
   119  			return indexOffset, nil
   120  		}
   121  
   122  		// idBytes is smaller than compBytes, go left
   123  		if comparison == -1 {
   124  			max = idx - 1
   125  			continue
   126  		}
   127  
   128  		// idBytes is larger than compBytes, go right
   129  		if comparison == 1 {
   130  			min = idx + 1
   131  			indexOffset, err := summaryBytesMetadata.IndexOffset(
   132  				il.summariesMmap.Bytes, resources.byteDecoderStream, resources.msgpackDecoder)
   133  			if err != nil {
   134  				return -1, err
   135  			}
   136  			// update the bestMatchSoFar everytime we move right
   137  			bestMatchSoFar = indexOffset
   138  			continue
   139  		}
   140  	}
   141  }
   142  
   143  func (il *nearestIndexOffsetLookup) close() error {
   144  	// Parent should clean up shared resources
   145  	if il.isClone {
   146  		return nil
   147  	}
   148  	return mmap.Munmap(il.summariesMmap)
   149  }
   150  
   151  // newNearestIndexOffsetLookupFromSummariesFile creates an nearestIndexOffsetLookup
   152  // from an index summaries file by reading the summaries file into an anonymous
   153  // mmap'd region, and also creating the slice of summaries offsets which is
   154  // required to binary search the data structure. It will also make sure that
   155  // the summaries file is sorted (which it always should be).
   156  func newNearestIndexOffsetLookupFromSummariesFile(
   157  	summariesFdWithDigest digest.FdWithDigestReader,
   158  	expectedDigest uint32,
   159  	decoder *xmsgpack.Decoder,
   160  	decoderStream xmsgpack.ByteDecoderStream,
   161  	numEntries int,
   162  	forceMmapMemory bool,
   163  	reporterOptions mmap.ReporterOptions,
   164  ) (*nearestIndexOffsetLookup, error) {
   165  	reporterOptions.Context.Name = mmapPersistFsSummariesFileName
   166  	summariesMmap, err := validateAndMmap(summariesFdWithDigest, expectedDigest, forceMmapMemory, reporterOptions)
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	// Msgpack decode the entire summaries file (we need to store the offsets
   172  	// for the entries so we can binary-search it)
   173  	var (
   174  		summaryTokens = make([]xmsgpack.IndexSummaryToken, 0, numEntries)
   175  		lastReadID    []byte
   176  	)
   177  	decoderStream.Reset(summariesMmap.Bytes)
   178  	decoder.Reset(decoderStream)
   179  
   180  	for read := 0; read < numEntries; read++ {
   181  		// We ignore the entry itself because we don't need any information from it
   182  		entry, summaryToken, err := decoder.DecodeIndexSummary()
   183  		if err != nil {
   184  			mmap.Munmap(summariesMmap)
   185  			return nil, err
   186  		}
   187  
   188  		// Make sure that all the IDs are sorted as we iterate, and return an error
   189  		// if they're not. This should never happen as files should be sorted on disk.
   190  		if lastReadID != nil && bytes.Compare(lastReadID, entry.ID) != -1 {
   191  			mmap.Munmap(summariesMmap)
   192  			return nil, fmt.Errorf("summaries file is not sorted: %s", summariesFdWithDigest.Fd().Name())
   193  		}
   194  		summaryTokens = append(summaryTokens, summaryToken)
   195  		lastReadID = entry.ID
   196  	}
   197  
   198  	return newNearestIndexOffsetLookup(summaryTokens, summariesMmap), nil
   199  }