github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/index_read.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"bytes"
    25  	"fmt"
    26  	"io"
    27  	"io/ioutil"
    28  	"os"
    29  
    30  	"github.com/m3db/m3/src/dbnode/digest"
    31  	"github.com/m3db/m3/src/dbnode/generated/proto/index"
    32  	"github.com/m3db/m3/src/dbnode/persist"
    33  	idxpersist "github.com/m3db/m3/src/m3ninx/persist"
    34  	"github.com/m3db/m3/src/x/mmap"
    35  	xtime "github.com/m3db/m3/src/x/time"
    36  
    37  	"go.uber.org/zap"
    38  )
    39  
    40  const (
    41  	mmapPersistFsIndexName = "mmap.persist.fs.index"
    42  )
    43  
    44  type indexReader struct {
    45  	opts           Options
    46  	filePathPrefix string
    47  	hugePagesOpts  mmap.HugeTLBOptions
    48  	logger         *zap.Logger
    49  
    50  	namespaceDir string
    51  	start        xtime.UnixNano
    52  	fileSetType  persist.FileSetType
    53  	volumeIndex  int
    54  
    55  	currIdx                int
    56  	info                   index.IndexVolumeInfo
    57  	expectedDigest         index.IndexDigests
    58  	expectedDigestOfDigest uint32
    59  	readDigests            indexReaderReadDigests
    60  }
    61  
    62  type indexReaderReadDigests struct {
    63  	infoFileDigest    uint32
    64  	digestsFileDigest uint32
    65  	segments          []indexReaderReadSegmentDigests
    66  }
    67  
    68  type indexReaderReadSegmentDigests struct {
    69  	segmentType idxpersist.IndexSegmentType
    70  	files       []indexReaderReadSegmentFileDigest
    71  }
    72  
    73  type indexReaderReadSegmentFileDigest struct {
    74  	segmentFileType idxpersist.IndexSegmentFileType
    75  	digest          uint32
    76  }
    77  
    78  // NewIndexReader returns a new index reader with options.
    79  func NewIndexReader(opts Options) (IndexFileSetReader, error) {
    80  	if err := opts.Validate(); err != nil {
    81  		return nil, err
    82  	}
    83  	r := new(indexReader)
    84  	r.reset(opts)
    85  	return r, nil
    86  }
    87  
    88  func (r *indexReader) reset(opts Options) {
    89  	*r = indexReader{}
    90  	r.opts = opts
    91  	r.filePathPrefix = opts.FilePathPrefix()
    92  	r.hugePagesOpts = mmap.HugeTLBOptions{
    93  		Enabled:   opts.MmapEnableHugeTLB(),
    94  		Threshold: opts.MmapHugeTLBThreshold(),
    95  	}
    96  	r.logger = opts.InstrumentOptions().Logger()
    97  }
    98  
    99  func (r *indexReader) Open(
   100  	opts IndexReaderOpenOptions,
   101  ) (IndexReaderOpenResult, error) {
   102  	var result IndexReaderOpenResult
   103  
   104  	// NB(r): so the reader can be reused.
   105  	r.reset(r.opts)
   106  
   107  	var (
   108  		namespace          = opts.Identifier.Namespace
   109  		checkpointFilepath string
   110  		infoFilepath       string
   111  		digestFilepath     string
   112  	)
   113  	r.start = opts.Identifier.BlockStart
   114  	r.fileSetType = opts.FileSetType
   115  	r.volumeIndex = opts.Identifier.VolumeIndex
   116  	switch opts.FileSetType {
   117  	case persist.FileSetSnapshotType:
   118  		r.namespaceDir = NamespaceIndexSnapshotDirPath(r.filePathPrefix, namespace)
   119  	case persist.FileSetFlushType:
   120  		r.namespaceDir = NamespaceIndexDataDirPath(r.filePathPrefix, namespace)
   121  	default:
   122  		return result, fmt.Errorf("cannot open index reader for fileset type: %s", opts.FileSetType)
   123  	}
   124  	checkpointFilepath = FilesetPathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, CheckpointFileSuffix)
   125  	infoFilepath = FilesetPathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, InfoFileSuffix)
   126  	digestFilepath = FilesetPathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, DigestFileSuffix)
   127  
   128  	// If there is no checkpoint file, don't read the index files.
   129  	if err := r.readCheckpointFile(checkpointFilepath); err != nil {
   130  		return result, err
   131  	}
   132  	if err := r.readDigestsFile(digestFilepath); err != nil {
   133  		return result, err
   134  	}
   135  	if err := r.readInfoFile(infoFilepath); err != nil {
   136  		return result, err
   137  	}
   138  	result.Shards = make(map[uint32]struct{}, len(r.info.Shards))
   139  	for _, shard := range r.info.Shards {
   140  		result.Shards[shard] = struct{}{}
   141  	}
   142  	return result, nil
   143  }
   144  
   145  func (r *indexReader) readCheckpointFile(filePath string) error {
   146  	exists, err := CompleteCheckpointFileExists(filePath)
   147  	if err != nil {
   148  		return err
   149  	}
   150  	if !exists {
   151  		return ErrCheckpointFileNotFound
   152  	}
   153  	data, err := ioutil.ReadFile(filePath)
   154  	if err != nil {
   155  		return err
   156  	}
   157  	r.expectedDigestOfDigest = digest.Buffer(data).ReadDigest()
   158  	return nil
   159  }
   160  
   161  func (r *indexReader) readDigestsFile(filePath string) error {
   162  	data, err := ioutil.ReadFile(filePath)
   163  	if err != nil {
   164  		return err
   165  	}
   166  	r.readDigests.digestsFileDigest = digest.Checksum(data)
   167  	if err := r.validateDigestsFileDigest(); err != nil {
   168  		return err
   169  	}
   170  	return r.expectedDigest.Unmarshal(data)
   171  }
   172  
   173  func (r *indexReader) readInfoFile(filePath string) error {
   174  	data, err := ioutil.ReadFile(filePath)
   175  	if err != nil {
   176  		return err
   177  	}
   178  	r.readDigests.infoFileDigest = digest.Checksum(data)
   179  	if r.readDigests.infoFileDigest != r.expectedDigest.InfoDigest {
   180  		return fmt.Errorf("read info file checksum bad: expected=%d, actual=%d",
   181  			r.expectedDigest.InfoDigest, r.readDigests.infoFileDigest)
   182  	}
   183  	return r.info.Unmarshal(data)
   184  }
   185  
   186  func (r *indexReader) SegmentFileSets() int {
   187  	return len(r.info.Segments)
   188  }
   189  
   190  func (r *indexReader) ReadSegmentFileSet() (
   191  	idxpersist.IndexSegmentFileSet,
   192  	error,
   193  ) {
   194  	if r.currIdx >= len(r.info.Segments) {
   195  		return nil, io.EOF
   196  	}
   197  
   198  	var (
   199  		segment = r.info.Segments[r.currIdx]
   200  		result  = readableIndexSegmentFileSet{
   201  			info:  segment,
   202  			files: make([]idxpersist.IndexSegmentFile, 0, len(segment.Files)),
   203  		}
   204  		digests = indexReaderReadSegmentDigests{
   205  			segmentType: idxpersist.IndexSegmentType(segment.SegmentType),
   206  		}
   207  	)
   208  	success := false
   209  	defer func() {
   210  		// Do not close opened files if read finishes successfully.
   211  		if success {
   212  			return
   213  		}
   214  		for _, file := range result.files {
   215  			file.Close()
   216  		}
   217  	}()
   218  	for _, file := range segment.Files {
   219  		segFileType := idxpersist.IndexSegmentFileType(file.SegmentFileType)
   220  
   221  		var filePath string
   222  		switch r.fileSetType {
   223  		case persist.FileSetSnapshotType:
   224  			filePath = snapshotIndexSegmentFilePathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex,
   225  				r.currIdx, segFileType)
   226  		case persist.FileSetFlushType:
   227  			filePath = filesetIndexSegmentFilePathFromTime(r.namespaceDir, r.start, r.volumeIndex,
   228  				r.currIdx, segFileType)
   229  		default:
   230  			return nil, fmt.Errorf("unknown fileset type: %s", r.fileSetType)
   231  		}
   232  
   233  		var (
   234  			fd   *os.File
   235  			desc mmap.Descriptor
   236  		)
   237  		mmapResult, err := mmap.Files(os.Open, map[string]mmap.FileDesc{
   238  			filePath: {
   239  				File:       &fd,
   240  				Descriptor: &desc,
   241  				Options: mmap.Options{
   242  					Read:    true,
   243  					HugeTLB: r.hugePagesOpts,
   244  					ReporterOptions: mmap.ReporterOptions{
   245  						Context: mmap.Context{
   246  							Name: mmapPersistFsIndexName,
   247  						},
   248  						Reporter: r.opts.MmapReporter(),
   249  					},
   250  				},
   251  			},
   252  		})
   253  		if err != nil {
   254  			return nil, err
   255  		}
   256  		if warning := mmapResult.Warning; warning != nil {
   257  			r.logger.Warn("warning while mmapping files in reader", zap.Error(warning))
   258  		}
   259  
   260  		file := newReadableIndexSegmentFileMmap(segFileType, fd, desc)
   261  		result.files = append(result.files, file)
   262  
   263  		if r.opts.IndexReaderAutovalidateIndexSegments() {
   264  			// Only checksum the file if we are autovalidating the index
   265  			// segments on open.
   266  			digests.files = append(digests.files, indexReaderReadSegmentFileDigest{
   267  				segmentFileType: segFileType,
   268  				digest:          digest.Checksum(desc.Bytes),
   269  			})
   270  		}
   271  
   272  		// NB(bodu): Free mmaped bytes after we take the checksum so we don't
   273  		// get memory spikes at bootstrap time.
   274  		if err := mmap.MadviseDontNeed(desc); err != nil {
   275  			return nil, err
   276  		}
   277  	}
   278  
   279  	r.currIdx++
   280  	r.readDigests.segments = append(r.readDigests.segments, digests)
   281  	success = true
   282  	return result, nil
   283  }
   284  
   285  func (r *indexReader) Validate() error {
   286  	if err := r.validateDigestsFileDigest(); err != nil {
   287  		return err
   288  	}
   289  	if err := r.validateInfoFileDigest(); err != nil {
   290  		return err
   291  	}
   292  	if !r.opts.IndexReaderAutovalidateIndexSegments() {
   293  		// Do not validate on segment open.
   294  		return nil
   295  	}
   296  	for i, segment := range r.info.Segments {
   297  		for j := range segment.Files {
   298  			if err := r.validateSegmentFileDigest(i, j); err != nil {
   299  				return err
   300  			}
   301  		}
   302  	}
   303  	return nil
   304  }
   305  
   306  func (r *indexReader) validateDigestsFileDigest() error {
   307  	if r.readDigests.digestsFileDigest != r.expectedDigestOfDigest {
   308  		return fmt.Errorf("read digests file checksum bad: expected=%d, actual=%d",
   309  			r.expectedDigestOfDigest, r.readDigests.digestsFileDigest)
   310  	}
   311  	return nil
   312  }
   313  
   314  func (r *indexReader) validateInfoFileDigest() error {
   315  	if r.readDigests.infoFileDigest != r.expectedDigest.InfoDigest {
   316  		return fmt.Errorf("read info file checksum bad: expected=%d, actual=%d",
   317  			r.expectedDigest.InfoDigest, r.readDigests.infoFileDigest)
   318  	}
   319  	return nil
   320  }
   321  
   322  func (r *indexReader) validateSegmentFileDigest(segmentIdx, fileIdx int) error {
   323  	if segmentIdx >= len(r.readDigests.segments) {
   324  		return fmt.Errorf(
   325  			"have not read correct number of segments to validate segment %d checksums: "+
   326  				"need=%d, actual=%d",
   327  			segmentIdx, segmentIdx+1, len(r.readDigests.segments))
   328  	}
   329  	if segmentIdx >= len(r.expectedDigest.SegmentDigests) {
   330  		return fmt.Errorf(
   331  			"have not read digest files correctly to validate segment %d checksums: "+
   332  				"need=%d, actual=%d",
   333  			segmentIdx, segmentIdx+1, len(r.expectedDigest.SegmentDigests))
   334  	}
   335  
   336  	if fileIdx >= len(r.readDigests.segments[segmentIdx].files) {
   337  		return fmt.Errorf(
   338  			"have not read correct number of segment files to validate segment %d checksums: "+
   339  				"need=%d, actual=%d",
   340  			segmentIdx, fileIdx+1, len(r.readDigests.segments[segmentIdx].files))
   341  	}
   342  	if fileIdx >= len(r.expectedDigest.SegmentDigests[segmentIdx].Files) {
   343  		return fmt.Errorf(
   344  			"have not read correct number of segment files to validate segment %d checksums: "+
   345  				"need=%d, actual=%d",
   346  			segmentIdx, fileIdx+1, len(r.expectedDigest.SegmentDigests[segmentIdx].Files))
   347  	}
   348  
   349  	expected := r.expectedDigest.SegmentDigests[segmentIdx].Files[fileIdx].Digest
   350  	actual := r.readDigests.segments[segmentIdx].files[fileIdx].digest
   351  	if actual != expected {
   352  		return fmt.Errorf("read segment file %d for segment %d checksum bad: expected=%d, actual=%d",
   353  			segmentIdx, fileIdx, expected, actual)
   354  	}
   355  	return nil
   356  }
   357  
   358  func (r *indexReader) IndexVolumeType() idxpersist.IndexVolumeType {
   359  	if r.info.IndexVolumeType == nil {
   360  		return idxpersist.DefaultIndexVolumeType
   361  	}
   362  	return idxpersist.IndexVolumeType(r.info.IndexVolumeType.Value)
   363  }
   364  
   365  func (r *indexReader) Close() error {
   366  	r.reset(r.opts)
   367  	return nil
   368  }
   369  
   370  // NB(r): to force the type to compile to match interface IndexSegmentFileSet
   371  var _ IndexSegmentFileSet = readableIndexSegmentFileSet{}
   372  
   373  type readableIndexSegmentFileSet struct {
   374  	info  *index.SegmentInfo
   375  	files []idxpersist.IndexSegmentFile
   376  }
   377  
   378  func (s readableIndexSegmentFileSet) SegmentType() idxpersist.IndexSegmentType {
   379  	return idxpersist.IndexSegmentType(s.info.SegmentType)
   380  }
   381  
   382  func (s readableIndexSegmentFileSet) MajorVersion() int {
   383  	return int(s.info.MajorVersion)
   384  }
   385  
   386  func (s readableIndexSegmentFileSet) MinorVersion() int {
   387  	return int(s.info.MinorVersion)
   388  }
   389  
   390  func (s readableIndexSegmentFileSet) SegmentMetadata() []byte {
   391  	return s.info.Metadata
   392  }
   393  
   394  func (s readableIndexSegmentFileSet) Files() []idxpersist.IndexSegmentFile {
   395  	return s.files
   396  }
   397  
   398  type readableIndexSegmentFileMmap struct {
   399  	fileType  idxpersist.IndexSegmentFileType
   400  	fd        *os.File
   401  	bytesMmap mmap.Descriptor
   402  	reader    bytes.Reader
   403  }
   404  
   405  func newReadableIndexSegmentFileMmap(
   406  	fileType idxpersist.IndexSegmentFileType,
   407  	fd *os.File,
   408  	bytesMmap mmap.Descriptor,
   409  ) idxpersist.IndexSegmentFile {
   410  	r := &readableIndexSegmentFileMmap{
   411  		fileType:  fileType,
   412  		fd:        fd,
   413  		bytesMmap: bytesMmap,
   414  	}
   415  	r.reader.Reset(r.bytesMmap.Bytes)
   416  	return r
   417  }
   418  
   419  func (f *readableIndexSegmentFileMmap) SegmentFileType() idxpersist.IndexSegmentFileType {
   420  	return f.fileType
   421  }
   422  
   423  func (f *readableIndexSegmentFileMmap) Mmap() (mmap.Descriptor, error) {
   424  	return f.bytesMmap, nil
   425  }
   426  
   427  func (f *readableIndexSegmentFileMmap) Read(b []byte) (int, error) {
   428  	return f.reader.Read(b)
   429  }
   430  
   431  func (f *readableIndexSegmentFileMmap) Close() error {
   432  	// Be sure to close the mmap before the file
   433  	if f.bytesMmap.Bytes != nil {
   434  		if err := mmap.Munmap(f.bytesMmap); err != nil {
   435  			return err
   436  		}
   437  		f.bytesMmap = mmap.Descriptor{}
   438  	}
   439  	if f.fd != nil {
   440  		if err := f.fd.Close(); err != nil {
   441  			return err
   442  		}
   443  		f.fd = nil
   444  	}
   445  	f.reader.Reset(nil)
   446  	return nil
   447  }