github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/coldflush.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package storage
    22  
    23  import (
    24  	"sync"
    25  
    26  	"github.com/m3db/m3/src/dbnode/persist"
    27  	xerrors "github.com/m3db/m3/src/x/errors"
    28  	"github.com/m3db/m3/src/x/instrument"
    29  	xtime "github.com/m3db/m3/src/x/time"
    30  
    31  	"github.com/uber-go/tally"
    32  	"go.uber.org/zap"
    33  	"go.uber.org/zap/zapcore"
    34  )
    35  
    36  type coldFlushManager struct {
    37  	databaseCleanupManager
    38  	sync.RWMutex
    39  
    40  	log      *zap.Logger
    41  	database database
    42  	pm       persist.Manager
    43  	opts     Options
    44  	// Retain using fileOpStatus here to be consistent w/ the
    45  	// filesystem manager since both are filesystem processes.
    46  	status         fileOpStatus
    47  	isColdFlushing tally.Gauge
    48  	enabled        bool
    49  }
    50  
    51  func newColdFlushManager(
    52  	database database,
    53  	pm persist.Manager,
    54  	opts Options,
    55  ) databaseColdFlushManager {
    56  	instrumentOpts := opts.InstrumentOptions()
    57  	scope := instrumentOpts.MetricsScope().SubScope("fs")
    58  	// NB(bodu): cold flush cleanup doesn't require commit logs.
    59  	cm := newCleanupManager(database, nil, scope)
    60  
    61  	return &coldFlushManager{
    62  		databaseCleanupManager: cm,
    63  		log:                    instrumentOpts.Logger(),
    64  		database:               database,
    65  		pm:                     pm,
    66  		opts:                   opts,
    67  		status:                 fileOpNotStarted,
    68  		isColdFlushing:         scope.Gauge("cold-flush"),
    69  		enabled:                true,
    70  	}
    71  }
    72  
    73  func (m *coldFlushManager) Disable() fileOpStatus {
    74  	m.Lock()
    75  	status := m.status
    76  	m.enabled = false
    77  	m.Unlock()
    78  	return status
    79  }
    80  
    81  func (m *coldFlushManager) Enable() fileOpStatus {
    82  	m.Lock()
    83  	status := m.status
    84  	m.enabled = true
    85  	m.Unlock()
    86  	return status
    87  }
    88  
    89  func (m *coldFlushManager) Status() fileOpStatus {
    90  	m.RLock()
    91  	status := m.status
    92  	m.RUnlock()
    93  	return status
    94  }
    95  
    96  func (m *coldFlushManager) Run(t xtime.UnixNano) bool {
    97  	m.Lock()
    98  	if !m.shouldRunWithLock() {
    99  		m.Unlock()
   100  		return false
   101  	}
   102  	m.status = fileOpInProgress
   103  	m.Unlock()
   104  
   105  	defer func() {
   106  		m.Lock()
   107  		m.status = fileOpNotStarted
   108  		m.Unlock()
   109  	}()
   110  
   111  	if log := m.log.Check(zapcore.DebugLevel, "cold flush run start"); log != nil {
   112  		log.Write(zap.Time("time", t.ToTime()))
   113  	}
   114  
   115  	// NB(xichen): perform data cleanup and flushing sequentially to minimize the impact of disk seeks.
   116  	// NB(r): Use invariant here since flush errors were introduced
   117  	// and not caught in CI or integration tests.
   118  	// When an invariant occurs in CI tests it panics so as to fail
   119  	// the build.
   120  	if err := m.ColdFlushCleanup(t); err != nil {
   121  		instrument.EmitAndLogInvariantViolation(m.opts.InstrumentOptions(),
   122  			func(l *zap.Logger) {
   123  				l.Error("error when cleaning up cold flush data",
   124  					zap.Time("time", t.ToTime()), zap.Error(err))
   125  			})
   126  	}
   127  	if err := m.trackedColdFlush(); err != nil {
   128  		instrument.EmitAndLogInvariantViolation(m.opts.InstrumentOptions(),
   129  			func(l *zap.Logger) {
   130  				l.Error("error when cold flushing data",
   131  					zap.Time("time", t.ToTime()), zap.Error(err))
   132  			})
   133  	}
   134  
   135  	if log := m.log.Check(zapcore.DebugLevel, "cold flush run complete"); log != nil {
   136  		log.Write(zap.Time("time", t.ToTime()))
   137  	}
   138  
   139  	return true
   140  }
   141  
   142  func (m *coldFlushManager) trackedColdFlush() error {
   143  	// The cold flush process will persist any data that has been "loaded" into memory via
   144  	// the Load() API but has not yet been persisted durably. As a result, if the cold flush
   145  	// process completes without error, then we want to "decrement" the number of tracked bytes
   146  	// by however many were outstanding right before the cold flush began.
   147  	//
   148  	// For example:
   149  	// t0: Load 100 bytes --> (numLoadedBytes == 100, numPendingLoadedBytes == 0)
   150  	// t1: memTracker.MarkLoadedAsPending() --> (numLoadedBytes == 100, numPendingLoadedBytes == 100)
   151  	// t2: Load 200 bytes --> (numLoadedBytes == 300, numPendingLoadedBytes == 100)
   152  	// t3: ColdFlushStart()
   153  	// t4: Load 300 bytes --> (numLoadedBytes == 600, numPendingLoadedBytes == 100)
   154  	// t5: ColdFlushEnd()
   155  	// t6: memTracker.DecPendingLoadedBytes() --> (numLoadedBytes == 500, numPendingLoadedBytes == 0)
   156  	// t7: memTracker.MarkLoadedAsPending() --> (numLoadedBytes == 500, numPendingLoadedBytes == 500)
   157  	// t8: ColdFlushStart()
   158  	// t9: ColdFlushError()
   159  	// t10: memTracker.MarkLoadedAsPending() --> (numLoadedBytes == 500, numPendingLoadedBytes == 500)
   160  	// t11: ColdFlushStart()
   161  	// t12: ColdFlushEnd()
   162  	// t13: memTracker.DecPendingLoadedBytes() --> (numLoadedBytes == 0, numPendingLoadedBytes == 0)
   163  	memTracker := m.opts.MemoryTracker()
   164  	memTracker.MarkLoadedAsPending()
   165  
   166  	if err := m.coldFlush(); err != nil {
   167  		return err
   168  	}
   169  
   170  	// Only decrement if the cold flush was a success. In this case, the decrement will reduce the
   171  	// value by however many bytes had been tracked when the cold flush began.
   172  	memTracker.DecPendingLoadedBytes()
   173  	return nil
   174  }
   175  
   176  func (m *coldFlushManager) coldFlush() error {
   177  	namespaces, err := m.database.OwnedNamespaces()
   178  	if err != nil {
   179  		return err
   180  	}
   181  
   182  	flushPersist, err := m.pm.StartFlushPersist()
   183  	if err != nil {
   184  		return err
   185  	}
   186  
   187  	multiErr := xerrors.NewMultiError()
   188  	for _, ns := range namespaces {
   189  		if err = ns.ColdFlush(flushPersist); err != nil {
   190  			multiErr = multiErr.Add(err)
   191  		}
   192  	}
   193  
   194  	multiErr = multiErr.Add(flushPersist.DoneFlush())
   195  	err = multiErr.FinalError()
   196  	return err
   197  }
   198  
   199  func (m *coldFlushManager) Report() {
   200  	m.databaseCleanupManager.Report()
   201  
   202  	m.RLock()
   203  	status := m.status
   204  	m.RUnlock()
   205  	if status == fileOpInProgress {
   206  		m.isColdFlushing.Update(1)
   207  	} else {
   208  		m.isColdFlushing.Update(0)
   209  	}
   210  }
   211  
   212  func (m *coldFlushManager) shouldRunWithLock() bool {
   213  	return m.enabled && m.status != fileOpInProgress && m.database.IsBootstrapped()
   214  }
   215  
   216  type coldFlushNsOpts struct {
   217  	reuseResources bool
   218  }
   219  
   220  // NewColdFlushNsOpts returns a new ColdFlushNsOpts.
   221  func NewColdFlushNsOpts(reuseResources bool) ColdFlushNsOpts {
   222  	return &coldFlushNsOpts{reuseResources: reuseResources}
   223  }
   224  
   225  func (o *coldFlushNsOpts) ReuseResources() bool {
   226  	return o.reuseResources
   227  }