github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/seek.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"bufio"
    25  	"bytes"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"os"
    30  	"time"
    31  
    32  	"github.com/m3db/m3/src/dbnode/digest"
    33  	xmsgpack "github.com/m3db/m3/src/dbnode/persist/fs/msgpack"
    34  	"github.com/m3db/m3/src/dbnode/persist/schema"
    35  	"github.com/m3db/m3/src/x/checked"
    36  	xerrors "github.com/m3db/m3/src/x/errors"
    37  	"github.com/m3db/m3/src/x/ident"
    38  	"github.com/m3db/m3/src/x/instrument"
    39  	"github.com/m3db/m3/src/x/mmap"
    40  	"github.com/m3db/m3/src/x/pool"
    41  	xtime "github.com/m3db/m3/src/x/time"
    42  
    43  	"gopkg.in/vmihailenco/msgpack.v2"
    44  )
    45  
    46  var (
    47  	// errSeekIDNotFound returned when ID cannot be found in the shard
    48  	errSeekIDNotFound = errors.New("id not found in shard")
    49  
    50  	// errSeekChecksumMismatch returned when data checksum does not match the expected checksum
    51  	errSeekChecksumMismatch = errors.New("checksum does not match expected checksum")
    52  
    53  	// errSeekNotCompleted returned when no error but seek did not complete.
    54  	errSeekNotCompleted = errors.New("seek not completed")
    55  
    56  	// errClonesShouldNotBeOpened returned when Open() is called on a clone
    57  	errClonesShouldNotBeOpened = errors.New("clone should not be opened")
    58  )
    59  
    60  const (
    61  	maxSimpleBytesPoolSliceSize = 4096
    62  	// One for the ID and one for the tags.
    63  	maxSimpleBytesPoolSize = 2
    64  )
    65  
    66  type seeker struct {
    67  	opts seekerOpts
    68  
    69  	// Data read from the indexInfo file. Note that we use xtime.UnixNano
    70  	// instead of time.Time to avoid keeping an extra pointer around.
    71  	start          xtime.UnixNano
    72  	blockSize      time.Duration
    73  	versionChecker schema.VersionChecker
    74  
    75  	dataFd        *os.File
    76  	indexFd       *os.File
    77  	indexFileSize int64
    78  
    79  	unreadBuf []byte
    80  
    81  	// Bloom filter associated with the shard / block the seeker is responsible
    82  	// for. Needs to be closed when done.
    83  	bloomFilter *ManagedConcurrentBloomFilter
    84  	indexLookup *nearestIndexOffsetLookup
    85  
    86  	isClone bool
    87  }
    88  
    89  // IndexEntry is an entry from the index file which can be passed to
    90  // SeekUsingIndexEntry to seek to the data for that entry.
    91  type IndexEntry struct {
    92  	Size         uint32
    93  	DataChecksum uint32
    94  	Offset       int64
    95  	EncodedTags  checked.Bytes
    96  }
    97  
    98  // NewSeeker returns a new seeker.
    99  func NewSeeker(
   100  	filePathPrefix string,
   101  	dataBufferSize int,
   102  	infoBufferSize int,
   103  	bytesPool pool.CheckedBytesPool,
   104  	keepUnreadBuf bool,
   105  	opts Options,
   106  ) DataFileSetSeeker {
   107  	return newSeeker(seekerOpts{
   108  		filePathPrefix: filePathPrefix,
   109  		dataBufferSize: dataBufferSize,
   110  		infoBufferSize: infoBufferSize,
   111  		bytesPool:      bytesPool,
   112  		keepUnreadBuf:  keepUnreadBuf,
   113  		opts:           opts,
   114  	})
   115  }
   116  
   117  type seekerOpts struct {
   118  	filePathPrefix string
   119  	infoBufferSize int
   120  	dataBufferSize int
   121  	bytesPool      pool.CheckedBytesPool
   122  	keepUnreadBuf  bool
   123  	opts           Options
   124  }
   125  
   126  // fileSetSeeker adds package level access to further methods
   127  // on the seeker for use by the seeker manager for efficient
   128  // multi-seeker use.
   129  type fileSetSeeker interface {
   130  	DataFileSetSeeker
   131  
   132  	// unreadBuffer returns the unread buffer
   133  	unreadBuffer() []byte
   134  
   135  	// setUnreadBuffer sets the unread buffer
   136  	setUnreadBuffer(buf []byte)
   137  }
   138  
   139  func newSeeker(opts seekerOpts) fileSetSeeker {
   140  	return &seeker{
   141  		opts: opts,
   142  	}
   143  }
   144  
   145  func (s *seeker) ConcurrentIDBloomFilter() *ManagedConcurrentBloomFilter {
   146  	return s.bloomFilter
   147  }
   148  
   149  func (s *seeker) Open(
   150  	namespace ident.ID,
   151  	shard uint32,
   152  	blockStart xtime.UnixNano,
   153  	volumeIndex int,
   154  	resources ReusableSeekerResources,
   155  ) error {
   156  	if s.isClone {
   157  		return errClonesShouldNotBeOpened
   158  	}
   159  
   160  	shardDir := ShardDataDirPath(s.opts.filePathPrefix, namespace, shard)
   161  	var (
   162  		infoFd, digestFd, bloomFilterFd, summariesFd *os.File
   163  		err                                          error
   164  		isLegacy                                     bool
   165  	)
   166  
   167  	if volumeIndex == 0 {
   168  		isLegacy, err = isFirstVolumeLegacy(shardDir, blockStart, CheckpointFileSuffix)
   169  		if err != nil {
   170  			return err
   171  		}
   172  	}
   173  
   174  	// Open necessary files
   175  	if err := openFiles(os.Open, map[string]**os.File{
   176  		dataFilesetPathFromTimeAndIndex(shardDir, blockStart, volumeIndex, InfoFileSuffix, isLegacy):        &infoFd,
   177  		dataFilesetPathFromTimeAndIndex(shardDir, blockStart, volumeIndex, indexFileSuffix, isLegacy):       &s.indexFd,
   178  		dataFilesetPathFromTimeAndIndex(shardDir, blockStart, volumeIndex, dataFileSuffix, isLegacy):        &s.dataFd,
   179  		dataFilesetPathFromTimeAndIndex(shardDir, blockStart, volumeIndex, DigestFileSuffix, isLegacy):      &digestFd,
   180  		dataFilesetPathFromTimeAndIndex(shardDir, blockStart, volumeIndex, bloomFilterFileSuffix, isLegacy): &bloomFilterFd,
   181  		dataFilesetPathFromTimeAndIndex(shardDir, blockStart, volumeIndex, summariesFileSuffix, isLegacy):   &summariesFd,
   182  	}); err != nil {
   183  		return err
   184  	}
   185  
   186  	var (
   187  		infoFdWithDigest           = resources.seekerOpenResources.infoFDDigestReader
   188  		indexFdWithDigest          = resources.seekerOpenResources.indexFDDigestReader
   189  		bloomFilterFdWithDigest    = resources.seekerOpenResources.bloomFilterFDDigestReader
   190  		summariesFdWithDigest      = resources.seekerOpenResources.summariesFDDigestReader
   191  		digestFdWithDigestContents = resources.seekerOpenResources.digestFDDigestContentsReader
   192  	)
   193  	defer func() {
   194  		// NB(rartoul): We don't need to keep these FDs open as we use them up front.
   195  		infoFdWithDigest.Close()
   196  		bloomFilterFdWithDigest.Close()
   197  		summariesFdWithDigest.Close()
   198  		digestFdWithDigestContents.Close()
   199  	}()
   200  
   201  	infoFdWithDigest.Reset(infoFd)
   202  	indexFdWithDigest.Reset(s.indexFd)
   203  	summariesFdWithDigest.Reset(summariesFd)
   204  	digestFdWithDigestContents.Reset(digestFd)
   205  
   206  	expectedDigests, err := readFileSetDigests(digestFdWithDigestContents)
   207  	if err != nil {
   208  		// Try to close if failed to read
   209  		s.Close()
   210  		return err
   211  	}
   212  
   213  	infoStat, err := infoFd.Stat()
   214  	if err != nil {
   215  		s.Close()
   216  		return err
   217  	}
   218  
   219  	info, err := s.readInfo(
   220  		int(infoStat.Size()),
   221  		infoFdWithDigest,
   222  		expectedDigests.infoDigest,
   223  		resources,
   224  	)
   225  	if err != nil {
   226  		s.Close()
   227  		return err
   228  	}
   229  	s.start = xtime.UnixNano(info.BlockStart)
   230  	s.blockSize = time.Duration(info.BlockSize)
   231  	s.versionChecker = schema.NewVersionChecker(int(info.MajorVersion), int(info.MinorVersion))
   232  
   233  	err = s.validateIndexFileDigest(
   234  		indexFdWithDigest, expectedDigests.indexDigest)
   235  	if err != nil {
   236  		s.Close()
   237  		return fmt.Errorf(
   238  			"index file digest for file: %s does not match the expected digest: %c",
   239  			filesetPathFromTimeLegacy(shardDir, blockStart, indexFileSuffix), err,
   240  		)
   241  	}
   242  
   243  	indexFdStat, err := s.indexFd.Stat()
   244  	if err != nil {
   245  		s.Close()
   246  		return err
   247  	}
   248  	s.indexFileSize = indexFdStat.Size()
   249  
   250  	s.bloomFilter, err = newManagedConcurrentBloomFilterFromFile(
   251  		bloomFilterFd,
   252  		bloomFilterFdWithDigest,
   253  		expectedDigests.bloomFilterDigest,
   254  		uint(info.BloomFilter.NumElementsM),
   255  		uint(info.BloomFilter.NumHashesK),
   256  		s.opts.opts.ForceBloomFilterMmapMemory(),
   257  		mmap.ReporterOptions{
   258  			Reporter: s.opts.opts.MmapReporter(),
   259  		},
   260  	)
   261  	if err != nil {
   262  		s.Close()
   263  		return err
   264  	}
   265  
   266  	summariesFdWithDigest.Reset(summariesFd)
   267  	s.indexLookup, err = newNearestIndexOffsetLookupFromSummariesFile(
   268  		summariesFdWithDigest,
   269  		expectedDigests.summariesDigest,
   270  		resources.xmsgpackDecoder,
   271  		resources.byteDecoderStream,
   272  		int(info.Summaries.Summaries),
   273  		s.opts.opts.ForceIndexSummariesMmapMemory(),
   274  		mmap.ReporterOptions{
   275  			Reporter: s.opts.opts.MmapReporter(),
   276  		},
   277  	)
   278  	if err != nil {
   279  		s.Close()
   280  		return err
   281  	}
   282  
   283  	if !s.opts.keepUnreadBuf {
   284  		// NB(r): Free the unread buffer and reset the decoder as unless
   285  		// using this seeker in the seeker manager we never use this buffer again.
   286  		s.unreadBuf = nil
   287  	}
   288  
   289  	return err
   290  }
   291  
   292  func (s *seeker) prepareUnreadBuf(size int) {
   293  	if len(s.unreadBuf) < size {
   294  		// NB(r): Make a little larger so unlikely to occur multiple times
   295  		s.unreadBuf = make([]byte, int(1.5*float64(size)))
   296  	}
   297  }
   298  
   299  func (s *seeker) unreadBuffer() []byte {
   300  	return s.unreadBuf
   301  }
   302  
   303  func (s *seeker) setUnreadBuffer(buf []byte) {
   304  	s.unreadBuf = buf
   305  }
   306  
   307  func (s *seeker) readInfo(
   308  	size int,
   309  	infoDigestReader digest.FdWithDigestReader,
   310  	expectedInfoDigest uint32,
   311  	resources ReusableSeekerResources,
   312  ) (schema.IndexInfo, error) {
   313  	s.prepareUnreadBuf(size)
   314  	n, err := infoDigestReader.ReadAllAndValidate(s.unreadBuf[:size], expectedInfoDigest)
   315  	if err != nil {
   316  		return schema.IndexInfo{}, err
   317  	}
   318  
   319  	resources.xmsgpackDecoder.Reset(xmsgpack.NewByteDecoderStream(s.unreadBuf[:n]))
   320  	return resources.xmsgpackDecoder.DecodeIndexInfo()
   321  }
   322  
   323  // SeekByID returns the data for the specified ID. An error will be returned if the
   324  // ID cannot be found.
   325  func (s *seeker) SeekByID(id ident.ID, resources ReusableSeekerResources) (checked.Bytes, error) {
   326  	entry, err := s.SeekIndexEntry(id, resources)
   327  	if err != nil {
   328  		return nil, err
   329  	}
   330  
   331  	return s.SeekByIndexEntry(entry, resources)
   332  }
   333  
   334  // SeekByIndexEntry is similar to Seek, but uses the provided IndexEntry
   335  // instead of looking it up on its own. Useful in cases where you've already
   336  // obtained an entry and don't want to waste resources looking it up again.
   337  func (s *seeker) SeekByIndexEntry(
   338  	entry IndexEntry,
   339  	resources ReusableSeekerResources,
   340  ) (checked.Bytes, error) {
   341  	resources.offsetFileReader.reset(s.dataFd, entry.Offset)
   342  
   343  	// Obtain an appropriately sized buffer.
   344  	var buffer checked.Bytes
   345  	if s.opts.bytesPool != nil {
   346  		buffer = s.opts.bytesPool.Get(int(entry.Size))
   347  		buffer.IncRef()
   348  		defer buffer.DecRef()
   349  		buffer.Resize(int(entry.Size))
   350  	} else {
   351  		buffer = checked.NewBytes(make([]byte, entry.Size), nil)
   352  		buffer.IncRef()
   353  		defer buffer.DecRef()
   354  	}
   355  
   356  	// Copy the actual data into the underlying buffer.
   357  	underlyingBuf := buffer.Bytes()
   358  	n, err := io.ReadFull(resources.offsetFileReader, underlyingBuf)
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  	if n != int(entry.Size) {
   363  		// This check is redundant because io.ReadFull will return an error if
   364  		// its not able to read the specified number of bytes, but we keep it
   365  		// in for posterity.
   366  		return nil, fmt.Errorf("tried to read: %d bytes but read: %d", entry.Size, n)
   367  	}
   368  
   369  	// NB(r): _must_ check the checksum against known checksum as the data
   370  	// file might not have been verified if we haven't read through the file yet.
   371  	if entry.DataChecksum != digest.Checksum(underlyingBuf) {
   372  		return nil, errSeekChecksumMismatch
   373  	}
   374  
   375  	return buffer, nil
   376  }
   377  
   378  // SeekIndexEntry performs the following steps:
   379  //
   380  //     1. Go to the indexLookup and it will give us an offset that is a good starting
   381  //        point for scanning the index file.
   382  //     2. Reset an offsetFileReader with the index fd and an offset (so that calls to Read() will
   383  //        begin at the offset provided by the offset lookup).
   384  //     3. Reset a decoder with fileDecoderStream (offsetFileReader wrapped in a bufio.Reader).
   385  //     4. Call DecodeIndexEntry in a tight loop (which will advance our position in the
   386  //        offsetFileReader internally) until we've either found the entry we're looking for or gone so
   387  //        far we know it does not exist.
   388  func (s *seeker) SeekIndexEntry(
   389  	id ident.ID,
   390  	resources ReusableSeekerResources,
   391  ) (IndexEntry, error) {
   392  	offset, err := s.indexLookup.getNearestIndexFileOffset(id, resources)
   393  	// Should never happen, either something is really wrong with the code or
   394  	// the file on disk was corrupted.
   395  	if err != nil {
   396  		return IndexEntry{}, err
   397  	}
   398  
   399  	resources.offsetFileReader.reset(s.indexFd, offset)
   400  	resources.fileDecoderStream.Reset(resources.offsetFileReader)
   401  	resources.xmsgpackDecoder.Reset(resources.fileDecoderStream)
   402  
   403  	idBytes := id.Bytes()
   404  	for {
   405  		// Use the bytesPool on resources here because its designed for this express purpose
   406  		// and is much faster / cheaper than the checked bytes pool which has a lot of
   407  		// synchronization and is prone to allocation (due to being shared). Basically because
   408  		// this is a tight loop (scanning linearly through the index file) we want to use a
   409  		// very cheap pool until we find what we're looking for, and then we can perform a single
   410  		// copy into checked.Bytes from the more expensive pool.
   411  		entry, err := resources.xmsgpackDecoder.DecodeIndexEntry(resources.decodeIndexEntryBytesPool)
   412  		if err == io.EOF {
   413  			// We reached the end of the file without finding it.
   414  			return IndexEntry{}, errSeekIDNotFound
   415  		}
   416  		if err != nil {
   417  			// Should never happen, either something is really wrong with the code or
   418  			// the file on disk was corrupted.
   419  			return IndexEntry{}, instrument.InvariantErrorf(err.Error())
   420  		}
   421  		if entry.ID == nil {
   422  			// Should never happen, either something is really wrong with the code or
   423  			// the file on disk was corrupted.
   424  			return IndexEntry{},
   425  				instrument.InvariantErrorf("decoded index entry had no ID for: %s", id.String())
   426  		}
   427  
   428  		comparison := bytes.Compare(entry.ID, idBytes)
   429  		if comparison == 0 {
   430  			// If it's a match, we need to copy the tags into a checked bytes
   431  			// so they can be passed along. We use the "real" bytes pool here
   432  			// because we're passing ownership of the bytes to the entry / caller.
   433  			var checkedEncodedTags checked.Bytes
   434  			if len(entry.EncodedTags) > 0 {
   435  				checkedEncodedTags = s.opts.bytesPool.Get(len(entry.EncodedTags))
   436  				checkedEncodedTags.IncRef()
   437  				checkedEncodedTags.AppendAll(entry.EncodedTags)
   438  			}
   439  
   440  			indexEntry := IndexEntry{
   441  				Size:         uint32(entry.Size),
   442  				DataChecksum: uint32(entry.DataChecksum),
   443  				Offset:       entry.Offset,
   444  				EncodedTags:  checkedEncodedTags,
   445  			}
   446  
   447  			// Safe to return resources to the pool because ID will not be
   448  			// passed along and tags have been copied.
   449  			resources.decodeIndexEntryBytesPool.Put(entry.ID)
   450  			resources.decodeIndexEntryBytesPool.Put(entry.EncodedTags)
   451  
   452  			return indexEntry, nil
   453  		}
   454  
   455  		// No longer being used so we can return to the pool.
   456  		resources.decodeIndexEntryBytesPool.Put(entry.ID)
   457  		resources.decodeIndexEntryBytesPool.Put(entry.EncodedTags)
   458  
   459  		// We've scanned far enough through the index file to be sure that the ID
   460  		// we're looking for doesn't exist (because the index is sorted by ID)
   461  		if comparison == 1 {
   462  			return IndexEntry{}, errSeekIDNotFound
   463  		}
   464  	}
   465  }
   466  
   467  func (s *seeker) Range() xtime.Range {
   468  	return xtime.Range{Start: s.start, End: s.start.Add(s.blockSize)}
   469  }
   470  
   471  func (s *seeker) Close() error {
   472  	// Parent should handle cleaning up shared resources
   473  	if s.isClone {
   474  		return nil
   475  	}
   476  
   477  	multiErr := xerrors.NewMultiError()
   478  	if s.bloomFilter != nil {
   479  		multiErr = multiErr.Add(s.bloomFilter.Close())
   480  		s.bloomFilter = nil
   481  	}
   482  	if s.indexLookup != nil {
   483  		multiErr = multiErr.Add(s.indexLookup.close())
   484  		s.indexLookup = nil
   485  	}
   486  	if s.indexFd != nil {
   487  		multiErr = multiErr.Add(s.indexFd.Close())
   488  		s.indexFd = nil
   489  	}
   490  	if s.dataFd != nil {
   491  		multiErr = multiErr.Add(s.dataFd.Close())
   492  		s.dataFd = nil
   493  	}
   494  	return multiErr.FinalError()
   495  }
   496  
   497  func (s *seeker) ConcurrentClone() (ConcurrentDataFileSetSeeker, error) {
   498  	// IndexLookup is not concurrency safe, but a parent and its clone can be used
   499  	// concurrently safely.
   500  	indexLookupClone, err := s.indexLookup.concurrentClone()
   501  	if err != nil {
   502  		return nil, err
   503  	}
   504  
   505  	seeker := &seeker{
   506  		opts:          s.opts,
   507  		indexFileSize: s.indexFileSize,
   508  		// BloomFilter is concurrency safe.
   509  		bloomFilter: s.bloomFilter,
   510  		indexLookup: indexLookupClone,
   511  		isClone:     true,
   512  
   513  		// Index and data fd's are always accessed via the ReadAt() / pread APIs so
   514  		// they are concurrency safe and can be shared among clones.
   515  		indexFd: s.indexFd,
   516  		dataFd:  s.dataFd,
   517  
   518  		versionChecker: s.versionChecker,
   519  	}
   520  
   521  	return seeker, nil
   522  }
   523  
   524  func (s *seeker) validateIndexFileDigest(
   525  	indexFdWithDigest digest.FdWithDigestReader,
   526  	expectedDigest uint32,
   527  ) error {
   528  	// If piecemeal checksumming validation enabled for index entries, do not attempt to validate the
   529  	// checksum of the entire file
   530  	if s.versionChecker.IndexEntryValidationEnabled() {
   531  		return nil
   532  	}
   533  
   534  	buf := make([]byte, s.opts.dataBufferSize)
   535  	for {
   536  		n, err := indexFdWithDigest.Read(buf)
   537  		if err != nil && err != io.EOF {
   538  			return fmt.Errorf("error reading index file: %v", err)
   539  		}
   540  		if n == 0 || err == io.EOF {
   541  			break
   542  		}
   543  	}
   544  	return indexFdWithDigest.Validate(expectedDigest)
   545  }
   546  
   547  // ReusableSeekerResources is a collection of reusable resources
   548  // that the seeker requires for seeking. It can be pooled by callers
   549  // using the seeker so that expensive resources don't need to be
   550  // maintained for each seeker, especially when only a few are generally
   551  // being used at a time due to the FetchConcurrency.
   552  type ReusableSeekerResources struct {
   553  	msgpackDecoder    *msgpack.Decoder
   554  	xmsgpackDecoder   *xmsgpack.Decoder
   555  	fileDecoderStream *bufio.Reader
   556  	byteDecoderStream xmsgpack.ByteDecoderStream
   557  	offsetFileReader  *offsetFileReader
   558  	// This pool should only be used for calling DecodeIndexEntry. We use a
   559  	// special pool here to avoid the overhead of channel synchronization, as
   560  	// well as ref counting that comes with the checked bytes pool. In addition,
   561  	// since the ReusableSeekerResources is only ever used by a single seeker at
   562  	// a time, we can size this pool such that it almost never has to allocate.
   563  	decodeIndexEntryBytesPool pool.BytesPool
   564  
   565  	seekerOpenResources reusableSeekerOpenResources
   566  }
   567  
   568  // reusableSeekerOpenResources contains resources used for the Open() method of the seeker.
   569  type reusableSeekerOpenResources struct {
   570  	infoFDDigestReader           digest.FdWithDigestReader
   571  	indexFDDigestReader          digest.FdWithDigestReader
   572  	bloomFilterFDDigestReader    digest.FdWithDigestReader
   573  	summariesFDDigestReader      digest.FdWithDigestReader
   574  	digestFDDigestContentsReader digest.FdWithDigestContentsReader
   575  }
   576  
   577  func newReusableSeekerOpenResources(opts Options) reusableSeekerOpenResources {
   578  	return reusableSeekerOpenResources{
   579  		infoFDDigestReader:           digest.NewFdWithDigestReader(opts.InfoReaderBufferSize()),
   580  		indexFDDigestReader:          digest.NewFdWithDigestReader(opts.DataReaderBufferSize()),
   581  		bloomFilterFDDigestReader:    digest.NewFdWithDigestReader(opts.DataReaderBufferSize()),
   582  		summariesFDDigestReader:      digest.NewFdWithDigestReader(opts.DataReaderBufferSize()),
   583  		digestFDDigestContentsReader: digest.NewFdWithDigestContentsReader(opts.InfoReaderBufferSize()),
   584  	}
   585  }
   586  
   587  // NewReusableSeekerResources creates a new ReusableSeekerResources.
   588  func NewReusableSeekerResources(opts Options) ReusableSeekerResources {
   589  	seekReaderSize := opts.SeekReaderBufferSize()
   590  	return ReusableSeekerResources{
   591  		msgpackDecoder:            msgpack.NewDecoder(nil),
   592  		xmsgpackDecoder:           xmsgpack.NewDecoder(opts.DecodingOptions()),
   593  		fileDecoderStream:         bufio.NewReaderSize(nil, seekReaderSize),
   594  		byteDecoderStream:         xmsgpack.NewByteDecoderStream(nil),
   595  		offsetFileReader:          newOffsetFileReader(),
   596  		decodeIndexEntryBytesPool: newSimpleBytesPool(),
   597  		seekerOpenResources:       newReusableSeekerOpenResources(opts),
   598  	}
   599  }
   600  
   601  type simpleBytesPool struct {
   602  	pool             [][]byte
   603  	maxByteSliceSize int
   604  	maxPoolSize      int
   605  }
   606  
   607  func newSimpleBytesPool() pool.BytesPool {
   608  	s := &simpleBytesPool{
   609  		maxByteSliceSize: maxSimpleBytesPoolSliceSize,
   610  		maxPoolSize:      maxSimpleBytesPoolSize,
   611  	}
   612  	s.Init()
   613  	return s
   614  }
   615  
   616  func (s *simpleBytesPool) Init() {
   617  	for i := 0; i < s.maxPoolSize; i++ {
   618  		s.pool = append(s.pool, make([]byte, 0, s.maxByteSliceSize))
   619  	}
   620  }
   621  
   622  func (s *simpleBytesPool) Get(capacity int) []byte {
   623  	if len(s.pool) == 0 {
   624  		return make([]byte, 0, capacity)
   625  	}
   626  
   627  	lastIdx := len(s.pool) - 1
   628  	b := s.pool[lastIdx]
   629  
   630  	if cap(b) >= capacity {
   631  		// If the slice has enough capacity, remove it from the
   632  		// pool and return it to the caller.
   633  		s.pool = s.pool[:lastIdx]
   634  		return b
   635  	}
   636  
   637  	return make([]byte, 0, capacity)
   638  }
   639  
   640  func (s *simpleBytesPool) Put(b []byte) {
   641  	if b == nil ||
   642  		len(s.pool) >= s.maxPoolSize ||
   643  		cap(b) > s.maxByteSliceSize {
   644  		return
   645  	}
   646  
   647  	s.pool = append(s.pool, b[:])
   648  }
   649  
   650  var _ io.Reader = &offsetFileReader{}
   651  
   652  // offsetFileReader implements io.Reader() and allows an *os.File to be wrapped
   653  // such that any calls to Read() are issued at the provided offset. This is used
   654  // to issue reads to specific portions of the index and data files without having
   655  // to first call Seek(). This reduces the number of syscalls that need to be made
   656  // and also allows the fds to be shared among concurrent goroutines since the
   657  // internal F.D offset managed by the kernel is not being used.
   658  type offsetFileReader struct {
   659  	fd     *os.File
   660  	offset int64
   661  }
   662  
   663  func newOffsetFileReader() *offsetFileReader {
   664  	return &offsetFileReader{}
   665  }
   666  
   667  func (p *offsetFileReader) Read(b []byte) (n int, err error) {
   668  	n, err = p.fd.ReadAt(b, p.offset)
   669  	p.offset += int64(n)
   670  	return n, err
   671  }
   672  
   673  func (p *offsetFileReader) reset(fd *os.File, offset int64) {
   674  	p.fd = fd
   675  	p.offset = offset
   676  }