github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/namespace_readers.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package storage
    22  
    23  import (
    24  	"sync"
    25  
    26  	"github.com/m3db/m3/src/dbnode/namespace"
    27  	"github.com/m3db/m3/src/dbnode/persist/fs"
    28  	"github.com/m3db/m3/src/dbnode/sharding"
    29  	"github.com/m3db/m3/src/dbnode/storage/block"
    30  	"github.com/m3db/m3/src/x/ident"
    31  	"github.com/m3db/m3/src/x/pool"
    32  	xtime "github.com/m3db/m3/src/x/time"
    33  
    34  	"github.com/uber-go/tally"
    35  	"go.uber.org/zap"
    36  )
    37  
    38  // namespaceReaderManager maintains a pool of closed readers which can be
    39  // re-used (to prevent additional allocations), as well as a cache of recently
    40  // used open readers based on their position. The cache of recently used open
    41  // readers is useful during peer bootstrapping because a pageToken (which
    42  // contains an offset into the reader for both the data and metadata portions
    43  // of the fileset) is used to communicate the clients current position to the
    44  // server.
    45  // In the general case, the client will miss on its first request for a given
    46  // shard/block start, and then experience a cache hit on every subsequent
    47  // request because the current client implementation does not perform any
    48  // parallel requests for a single shard.
    49  // The closedReaders pool is modeled as a stack (implemented via slice
    50  // operations) and the open readers cache is implemented as a map where the
    51  // key is of type cachedOpenReaderKey.
    52  // The namespaceReaderManager also implements a tick() method which should
    53  // be called regularly in order to shrunk the closedReaders stack after bursts
    54  // of usage, as well as to expire cached open readers which have not been used
    55  // for a configurable number of ticks.
    56  
    57  const (
    58  	expireCachedReadersAfterNumTicks = 2
    59  )
    60  
    61  type databaseNamespaceReaderManager interface {
    62  	filesetExistsAt(
    63  		shard uint32,
    64  		blockStart xtime.UnixNano,
    65  	) (bool, error)
    66  
    67  	get(
    68  		shard uint32,
    69  		blockStart xtime.UnixNano,
    70  		position readerPosition,
    71  	) (fs.DataFileSetReader, error)
    72  
    73  	put(reader fs.DataFileSetReader) error
    74  
    75  	latestVolume(shard uint32, blockStart xtime.UnixNano) (int, error)
    76  
    77  	assignShardSet(shardSet sharding.ShardSet)
    78  
    79  	tick()
    80  
    81  	close()
    82  }
    83  
    84  type fsFileSetExistsFn func(
    85  	prefix string,
    86  	namespace ident.ID,
    87  	shard uint32,
    88  	blockStart xtime.UnixNano,
    89  	volume int,
    90  ) (bool, error)
    91  
    92  type fsNewReaderFn func(
    93  	bytesPool pool.CheckedBytesPool,
    94  	opts fs.Options,
    95  ) (fs.DataFileSetReader, error)
    96  
    97  type namespaceReaderManager struct {
    98  	sync.Mutex
    99  
   100  	filesetExistsFn fsFileSetExistsFn
   101  	newReaderFn     fsNewReaderFn
   102  
   103  	namespace         namespace.Metadata
   104  	fsOpts            fs.Options
   105  	blockLeaseManager block.LeaseManager
   106  	bytesPool         pool.CheckedBytesPool
   107  
   108  	logger *zap.Logger
   109  
   110  	closedReaders []cachedReader
   111  	openReaders   map[cachedOpenReaderKey]cachedReader
   112  	shardSet      sharding.ShardSet
   113  
   114  	metrics namespaceReaderManagerMetrics
   115  }
   116  
   117  type cachedOpenReaderKey struct {
   118  	shard      uint32
   119  	blockStart xtime.UnixNano
   120  	position   readerPosition
   121  }
   122  
   123  type readerPosition struct {
   124  	volume      int
   125  	dataIdx     int
   126  	metadataIdx int
   127  }
   128  
   129  type cachedReader struct {
   130  	reader         fs.DataFileSetReader
   131  	ticksSinceUsed int
   132  }
   133  
   134  type namespaceReaderManagerMetrics struct {
   135  	cacheHit              tally.Counter
   136  	cacheMissAllocReader  tally.Counter
   137  	cacheMissReusedReader tally.Counter
   138  }
   139  
   140  func newNamespaceReaderManagerMetrics(
   141  	scope tally.Scope,
   142  ) namespaceReaderManagerMetrics {
   143  	subScope := scope.SubScope("reader-cache")
   144  	return namespaceReaderManagerMetrics{
   145  		cacheHit: subScope.Counter("hit"),
   146  		cacheMissAllocReader: subScope.Tagged(map[string]string{
   147  			"miss_type": "alloc_reader",
   148  		}).Counter("miss"),
   149  		cacheMissReusedReader: subScope.Tagged(map[string]string{
   150  			"miss_type": "reuse_reader",
   151  		}).Counter("miss"),
   152  	}
   153  }
   154  
   155  func newNamespaceReaderManager(
   156  	namespace namespace.Metadata,
   157  	namespaceScope tally.Scope,
   158  	opts Options,
   159  ) databaseNamespaceReaderManager {
   160  	blm := opts.BlockLeaseManager()
   161  	mgr := &namespaceReaderManager{
   162  		filesetExistsFn:   fs.DataFileSetExists,
   163  		newReaderFn:       fs.NewReader,
   164  		namespace:         namespace,
   165  		fsOpts:            opts.CommitLogOptions().FilesystemOptions(),
   166  		blockLeaseManager: blm,
   167  		bytesPool:         opts.BytesPool(),
   168  		logger:            opts.InstrumentOptions().Logger(),
   169  		openReaders:       make(map[cachedOpenReaderKey]cachedReader),
   170  		shardSet:          sharding.NewEmptyShardSet(sharding.DefaultHashFn(1)),
   171  		metrics:           newNamespaceReaderManagerMetrics(namespaceScope),
   172  	}
   173  
   174  	blm.RegisterLeaser(mgr)
   175  
   176  	return mgr
   177  }
   178  
   179  func (m *namespaceReaderManager) latestVolume(
   180  	shard uint32,
   181  	blockStart xtime.UnixNano,
   182  ) (int, error) {
   183  	state, err := m.blockLeaseManager.OpenLatestLease(m, block.LeaseDescriptor{
   184  		Namespace:  m.namespace.ID(),
   185  		Shard:      shard,
   186  		BlockStart: blockStart,
   187  	})
   188  	if err != nil {
   189  		return -1, err
   190  	}
   191  
   192  	return state.Volume, nil
   193  }
   194  
   195  func (m *namespaceReaderManager) filesetExistsAt(
   196  	shard uint32,
   197  	blockStart xtime.UnixNano,
   198  ) (bool, error) {
   199  	latestVolume, err := m.latestVolume(shard, blockStart)
   200  	if err != nil {
   201  		return false, err
   202  	}
   203  
   204  	return m.filesetExistsFn(m.fsOpts.FilePathPrefix(),
   205  		m.namespace.ID(), shard, blockStart, latestVolume)
   206  }
   207  
   208  func (m *namespaceReaderManager) assignShardSet(shardSet sharding.ShardSet) {
   209  	m.Lock()
   210  	defer m.Unlock()
   211  	m.shardSet = shardSet
   212  }
   213  
   214  func (m *namespaceReaderManager) shardExistsWithLock(shard uint32) bool {
   215  	_, err := m.shardSet.LookupStateByID(shard)
   216  	// NB(bodu): LookupStateByID returns ErrInvalidShardID when shard
   217  	// does not exist in the shard map which means the shard is not available.
   218  	return err == nil
   219  }
   220  
   221  type cachedReaderForKeyResult struct {
   222  	openReader   fs.DataFileSetReader
   223  	closedReader fs.DataFileSetReader
   224  }
   225  
   226  func (m *namespaceReaderManager) pushClosedReaderWithLock(
   227  	reader fs.DataFileSetReader,
   228  ) {
   229  	m.closedReaders = append(m.closedReaders, cachedReader{
   230  		reader: reader,
   231  	})
   232  }
   233  
   234  func (m *namespaceReaderManager) popClosedReaderWithLock() fs.DataFileSetReader {
   235  	idx := len(m.closedReaders) - 1
   236  	reader := m.closedReaders[idx].reader
   237  	// Zero refs from element in slice and shrink slice
   238  	m.closedReaders[idx] = cachedReader{}
   239  	m.closedReaders = m.closedReaders[:idx]
   240  	return reader
   241  }
   242  
   243  func (m *namespaceReaderManager) cachedReaderForKey(
   244  	key cachedOpenReaderKey,
   245  ) (cachedReaderForKeyResult, error) {
   246  	m.Lock()
   247  	defer m.Unlock()
   248  
   249  	openReader, ok := m.openReaders[key]
   250  	if ok {
   251  		// Cache hit, take this open reader
   252  		delete(m.openReaders, key)
   253  
   254  		m.metrics.cacheHit.Inc(1)
   255  
   256  		return cachedReaderForKeyResult{
   257  			openReader: openReader.reader,
   258  		}, nil
   259  	}
   260  
   261  	// Cache miss, need to return a reused reader or open a new reader
   262  	if len(m.closedReaders) > 0 {
   263  		reader := m.popClosedReaderWithLock()
   264  
   265  		m.metrics.cacheMissReusedReader.Inc(1)
   266  		return cachedReaderForKeyResult{
   267  			closedReader: reader,
   268  		}, nil
   269  	}
   270  
   271  	reader, err := m.newReaderFn(m.bytesPool, m.fsOpts)
   272  	if err != nil {
   273  		return cachedReaderForKeyResult{}, err
   274  	}
   275  
   276  	m.metrics.cacheMissAllocReader.Inc(1)
   277  	return cachedReaderForKeyResult{
   278  		closedReader: reader,
   279  	}, nil
   280  }
   281  
   282  func (m *namespaceReaderManager) get(
   283  	shard uint32,
   284  	blockStart xtime.UnixNano,
   285  	position readerPosition,
   286  ) (fs.DataFileSetReader, error) {
   287  	latestVolume, err := m.latestVolume(shard, blockStart)
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  
   292  	// If requesting an outdated volume, we need to start reading again from
   293  	// the beginning of the latest volume. The caller knows how to handle
   294  	// duplicate metadata, so doing this is okay.
   295  	//
   296  	// The previously cached reader for the outdated volume will eventually be
   297  	// cleaned up either during the ticking process or the next time
   298  	// UpdateOpenLease gets called, so we don't need to worry about closing it
   299  	// here.
   300  	if position.volume < latestVolume {
   301  		position.volume = latestVolume
   302  		position.dataIdx = 0
   303  		position.metadataIdx = 0
   304  	}
   305  
   306  	key := cachedOpenReaderKey{
   307  		shard:      shard,
   308  		blockStart: blockStart,
   309  		position:   position,
   310  	}
   311  
   312  	lookup, err := m.cachedReaderForKey(key)
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  	if reader := lookup.openReader; reader != nil {
   317  		return reader, nil // Found an open reader for the position
   318  	}
   319  
   320  	// We have a closed reader from the cache (either a cached closed
   321  	// reader or newly allocated, either way need to prepare it)
   322  	reader := lookup.closedReader
   323  
   324  	openOpts := fs.DataReaderOpenOptions{
   325  		Identifier: fs.FileSetFileIdentifier{
   326  			Namespace:   m.namespace.ID(),
   327  			Shard:       shard,
   328  			BlockStart:  blockStart,
   329  			VolumeIndex: latestVolume,
   330  		},
   331  	}
   332  	if err := reader.Open(openOpts); err != nil {
   333  		return nil, err
   334  	}
   335  
   336  	// We can validate metadata immediately since its read when opened
   337  	if err := reader.ValidateMetadata(); err != nil {
   338  		return nil, err
   339  	}
   340  
   341  	// Fast fwd through if in the middle of a volume
   342  	for i := 0; i < position.dataIdx; i++ {
   343  		id, tags, data, _, err := reader.Read()
   344  		if err != nil {
   345  			return nil, err
   346  		}
   347  		id.Finalize()
   348  		tags.Close()
   349  		data.Finalize()
   350  	}
   351  	for i := 0; i < position.metadataIdx; i++ {
   352  		id, tags, _, _, err := reader.ReadMetadata()
   353  		if err != nil {
   354  			return nil, err
   355  		}
   356  		id.Finalize()
   357  		tags.Close()
   358  	}
   359  
   360  	return reader, nil
   361  }
   362  
   363  func (m *namespaceReaderManager) closeAndPushReaderWithLock(reader fs.DataFileSetReader) error {
   364  	if err := reader.Close(); err != nil {
   365  		return err
   366  	}
   367  
   368  	m.pushClosedReaderWithLock(reader)
   369  	return nil
   370  }
   371  
   372  func (m *namespaceReaderManager) put(reader fs.DataFileSetReader) error {
   373  	status := reader.Status()
   374  
   375  	m.Lock()
   376  	defer m.Unlock()
   377  
   378  	if !status.Open {
   379  		m.pushClosedReaderWithLock(reader)
   380  		return nil
   381  	}
   382  
   383  	shard := status.Shard
   384  
   385  	latestVolume, err := m.latestVolume(shard, status.BlockStart)
   386  	if err != nil {
   387  		return err
   388  	}
   389  
   390  	// If the supplied reader is for a stale volume, then it will never be
   391  	// reused in its current state. Instead, put it in the closed reader pool
   392  	// so that it can be reconfigured to be reopened later.
   393  	if latestVolume > status.Volume {
   394  		if err := m.closeAndPushReaderWithLock(reader); err != nil {
   395  			// Best effort on closing the reader and caching it. If it fails,
   396  			// we can always allocate a new reader.
   397  			m.logger.Error("error closing reader on put from reader cache", zap.Error(err))
   398  		}
   399  		return nil
   400  	}
   401  
   402  	key := cachedOpenReaderKey{
   403  		shard:      shard,
   404  		blockStart: status.BlockStart,
   405  		position: readerPosition{
   406  			volume:      status.Volume,
   407  			dataIdx:     reader.EntriesRead(),
   408  			metadataIdx: reader.MetadataRead(),
   409  		},
   410  	}
   411  
   412  	if _, ok := m.openReaders[key]; ok {
   413  		// There is already an open reader cached for this key. We don't need
   414  		// a duplicate one, so close the reader and push to slice of closed
   415  		// readers.
   416  		if err := m.closeAndPushReaderWithLock(reader); err != nil {
   417  			// Best effort on closing the reader and caching it. If it fails,
   418  			// we can always allocate a new reader.
   419  			m.logger.Error("error closing reader on put from reader cache", zap.Error(err))
   420  		}
   421  		return nil
   422  	}
   423  
   424  	m.openReaders[key] = cachedReader{reader: reader}
   425  
   426  	return nil
   427  }
   428  
   429  func (m *namespaceReaderManager) tick() {
   430  	m.tickWithThreshold(expireCachedReadersAfterNumTicks)
   431  }
   432  
   433  func (m *namespaceReaderManager) close() {
   434  	m.blockLeaseManager.UnregisterLeaser(m)
   435  
   436  	// Perform a tick but make the threshold zero so all readers must be expired
   437  	m.tickWithThreshold(0)
   438  }
   439  
   440  func (m *namespaceReaderManager) tickWithThreshold(threshold int) {
   441  	m.Lock()
   442  	defer m.Unlock()
   443  
   444  	// First increment ticks since used for closed readers
   445  	expiredClosedReaders := 0
   446  	for i := range m.closedReaders {
   447  		m.closedReaders[i].ticksSinceUsed++
   448  		if m.closedReaders[i].ticksSinceUsed >= threshold {
   449  			expiredClosedReaders++
   450  		}
   451  	}
   452  	// Expire any closed readers, alloc a new slice to avoid spikes
   453  	// of use creating slices that are never released
   454  	if expired := expiredClosedReaders; expired > 0 {
   455  		newClosedReaders := make([]cachedReader, 0, len(m.closedReaders)-expired)
   456  		for _, elem := range m.closedReaders {
   457  			if elem.ticksSinceUsed < threshold {
   458  				newClosedReaders = append(newClosedReaders, elem)
   459  			}
   460  		}
   461  		m.closedReaders = newClosedReaders
   462  	}
   463  
   464  	// For open readers calculate and expire from map directly
   465  	for key, elem := range m.openReaders {
   466  		// Mutate the for-loop copy in place before checking the threshold
   467  		elem.ticksSinceUsed++
   468  		if elem.ticksSinceUsed >= threshold ||
   469  			// Also check to see if shard is still available and remove cached readers for
   470  			// shards that are no longer available. This ensures cached readers are eventually
   471  			// consistent with shard state.
   472  			!m.shardExistsWithLock(key.shard) {
   473  			// Close before removing ref
   474  			if err := elem.reader.Close(); err != nil {
   475  				m.logger.Error("error closing reader from reader cache", zap.Error(err))
   476  			}
   477  			delete(m.openReaders, key)
   478  			continue
   479  		}
   480  
   481  		// Save the mutated copy back to the map
   482  		m.openReaders[key] = elem
   483  	}
   484  }
   485  
   486  // UpdateOpenLease() implements block.Leaser.
   487  func (m *namespaceReaderManager) UpdateOpenLease(
   488  	descriptor block.LeaseDescriptor,
   489  	state block.LeaseState,
   490  ) (block.UpdateOpenLeaseResult, error) {
   491  	if !m.namespace.ID().Equal(descriptor.Namespace) {
   492  		return block.NoOpenLease, nil
   493  	}
   494  
   495  	m.Lock()
   496  	defer m.Unlock()
   497  	// Close and remove open readers with matching key but lower volume.
   498  	for readerKey, cachedReader := range m.openReaders {
   499  		if readerKey.shard == descriptor.Shard &&
   500  			readerKey.blockStart == descriptor.BlockStart &&
   501  			readerKey.position.volume < state.Volume {
   502  			delete(m.openReaders, readerKey)
   503  			if err := m.closeAndPushReaderWithLock(cachedReader.reader); err != nil {
   504  				// Best effort on closing the reader and caching it. If it
   505  				// fails, we can always allocate a new reader.
   506  				m.logger.Error("error closing reader on put from reader cache", zap.Error(err))
   507  			}
   508  		}
   509  	}
   510  
   511  	return block.UpdateOpenLease, nil
   512  }