github.com/m3db/m3@v1.5.0/src/dbnode/storage/coldflush.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package storage
    22  
    23  import (
    24  	"sync"
    25  
    26  	"github.com/m3db/m3/src/dbnode/persist"
    27  	xerrors "github.com/m3db/m3/src/x/errors"
    28  	"github.com/m3db/m3/src/x/instrument"
    29  	xtime "github.com/m3db/m3/src/x/time"
    30  
    31  	"github.com/uber-go/tally"
    32  	"go.uber.org/zap"
    33  	"go.uber.org/zap/zapcore"
    34  )
    35  
    36  type coldFlushManager struct {
    37  	databaseCleanupManager
    38  	sync.RWMutex
    39  
    40  	log      *zap.Logger
    41  	database database
    42  	pm       persist.Manager
    43  	opts     Options
    44  	// Retain using fileOpStatus here to be consistent w/ the
    45  	// filesystem manager since both are filesystem processes.
    46  	status         fileOpStatus
    47  	isColdFlushing tally.Gauge
    48  	enabled        bool
    49  }
    50  
    51  func newColdFlushManager(
    52  	database database,
    53  	pm persist.Manager,
    54  	opts Options,
    55  ) databaseColdFlushManager {
    56  	instrumentOpts := opts.InstrumentOptions()
    57  	scope := instrumentOpts.MetricsScope().SubScope("fs")
    58  	// NB(bodu): cold flush cleanup doesn't require commit logs.
    59  	cm := newCleanupManager(database, nil, scope)
    60  
    61  	return &coldFlushManager{
    62  		databaseCleanupManager: cm,
    63  		log:                    instrumentOpts.Logger(),
    64  		database:               database,
    65  		pm:                     pm,
    66  		opts:                   opts,
    67  		status:                 fileOpNotStarted,
    68  		isColdFlushing:         scope.Gauge("cold-flush"),
    69  		enabled:                true,
    70  	}
    71  }
    72  
    73  func (m *coldFlushManager) Disable() fileOpStatus {
    74  	m.Lock()
    75  	status := m.status
    76  	m.enabled = false
    77  	m.Unlock()
    78  	return status
    79  }
    80  
    81  func (m *coldFlushManager) Enable() fileOpStatus {
    82  	m.Lock()
    83  	status := m.status
    84  	m.enabled = true
    85  	m.Unlock()
    86  	return status
    87  }
    88  
    89  func (m *coldFlushManager) Status() fileOpStatus {
    90  	m.RLock()
    91  	status := m.status
    92  	m.RUnlock()
    93  	return status
    94  }
    95  
    96  func (m *coldFlushManager) Run(t xtime.UnixNano) bool {
    97  	m.Lock()
    98  	if !m.shouldRunWithLock() {
    99  		m.Unlock()
   100  		return false
   101  	}
   102  	m.status = fileOpInProgress
   103  	m.Unlock()
   104  
   105  	defer func() {
   106  		m.Lock()
   107  		m.status = fileOpNotStarted
   108  		m.Unlock()
   109  	}()
   110  
   111  	debugLog := m.log.Check(zapcore.DebugLevel, "cold flush run")
   112  	if debugLog != nil {
   113  		debugLog.Write(zap.String("status", "starting cold flush"), zap.Time("time", t.ToTime()))
   114  	}
   115  
   116  	// NB(xichen): perform data cleanup and flushing sequentially to minimize the impact of disk seeks.
   117  	// NB(r): Use invariant here since flush errors were introduced
   118  	// and not caught in CI or integration tests.
   119  	// When an invariant occurs in CI tests it panics so as to fail
   120  	// the build.
   121  	if err := m.ColdFlushCleanup(t); err != nil {
   122  		instrument.EmitAndLogInvariantViolation(m.opts.InstrumentOptions(),
   123  			func(l *zap.Logger) {
   124  				l.Error("error when cleaning up cold flush data",
   125  					zap.Time("time", t.ToTime()), zap.Error(err))
   126  			})
   127  	}
   128  	if err := m.trackedColdFlush(); err != nil {
   129  		instrument.EmitAndLogInvariantViolation(m.opts.InstrumentOptions(),
   130  			func(l *zap.Logger) {
   131  				l.Error("error when cold flushing data",
   132  					zap.Time("time", t.ToTime()), zap.Error(err))
   133  			})
   134  	}
   135  
   136  	if debugLog != nil {
   137  		debugLog.Write(zap.String("status", "completed cold flush"), zap.Time("time", t.ToTime()))
   138  	}
   139  
   140  	return true
   141  }
   142  
   143  func (m *coldFlushManager) trackedColdFlush() error {
   144  	// The cold flush process will persist any data that has been "loaded" into memory via
   145  	// the Load() API but has not yet been persisted durably. As a result, if the cold flush
   146  	// process completes without error, then we want to "decrement" the number of tracked bytes
   147  	// by however many were outstanding right before the cold flush began.
   148  	//
   149  	// For example:
   150  	// t0: Load 100 bytes --> (numLoadedBytes == 100, numPendingLoadedBytes == 0)
   151  	// t1: memTracker.MarkLoadedAsPending() --> (numLoadedBytes == 100, numPendingLoadedBytes == 100)
   152  	// t2: Load 200 bytes --> (numLoadedBytes == 300, numPendingLoadedBytes == 100)
   153  	// t3: ColdFlushStart()
   154  	// t4: Load 300 bytes --> (numLoadedBytes == 600, numPendingLoadedBytes == 100)
   155  	// t5: ColdFlushEnd()
   156  	// t6: memTracker.DecPendingLoadedBytes() --> (numLoadedBytes == 500, numPendingLoadedBytes == 0)
   157  	// t7: memTracker.MarkLoadedAsPending() --> (numLoadedBytes == 500, numPendingLoadedBytes == 500)
   158  	// t8: ColdFlushStart()
   159  	// t9: ColdFlushError()
   160  	// t10: memTracker.MarkLoadedAsPending() --> (numLoadedBytes == 500, numPendingLoadedBytes == 500)
   161  	// t11: ColdFlushStart()
   162  	// t12: ColdFlushEnd()
   163  	// t13: memTracker.DecPendingLoadedBytes() --> (numLoadedBytes == 0, numPendingLoadedBytes == 0)
   164  	memTracker := m.opts.MemoryTracker()
   165  	memTracker.MarkLoadedAsPending()
   166  
   167  	if err := m.coldFlush(); err != nil {
   168  		return err
   169  	}
   170  
   171  	// Only decrement if the cold flush was a success. In this case, the decrement will reduce the
   172  	// value by however many bytes had been tracked when the cold flush began.
   173  	memTracker.DecPendingLoadedBytes()
   174  	return nil
   175  }
   176  
   177  func (m *coldFlushManager) coldFlush() error {
   178  	namespaces, err := m.database.OwnedNamespaces()
   179  	if err != nil {
   180  		return err
   181  	}
   182  
   183  	flushPersist, err := m.pm.StartFlushPersist()
   184  	if err != nil {
   185  		return err
   186  	}
   187  
   188  	multiErr := xerrors.NewMultiError()
   189  	for _, ns := range namespaces {
   190  		if err = ns.ColdFlush(flushPersist); err != nil {
   191  			multiErr = multiErr.Add(err)
   192  		}
   193  	}
   194  
   195  	multiErr = multiErr.Add(flushPersist.DoneFlush())
   196  	err = multiErr.FinalError()
   197  	return err
   198  }
   199  
   200  func (m *coldFlushManager) Report() {
   201  	m.databaseCleanupManager.Report()
   202  
   203  	m.RLock()
   204  	status := m.status
   205  	m.RUnlock()
   206  	if status == fileOpInProgress {
   207  		m.isColdFlushing.Update(1)
   208  	} else {
   209  		m.isColdFlushing.Update(0)
   210  	}
   211  }
   212  
   213  func (m *coldFlushManager) shouldRunWithLock() bool {
   214  	return m.enabled && m.status != fileOpInProgress && m.database.IsBootstrapped()
   215  }
   216  
   217  type coldFlushNsOpts struct {
   218  	reuseResources bool
   219  }
   220  
   221  // NewColdFlushNsOpts returns a new ColdFlushNsOpts.
   222  func NewColdFlushNsOpts(reuseResources bool) ColdFlushNsOpts {
   223  	return &coldFlushNsOpts{reuseResources: reuseResources}
   224  }
   225  
   226  func (o *coldFlushNsOpts) ReuseResources() bool {
   227  	return o.reuseResources
   228  }