vitess.io/vitess@v0.16.2/go/vt/throttler/max_replication_lag_module.go

vitess.io/vitess@v0.16.2/go/vt/throttler/max_replication_lag_module.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package throttler
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"sync"
    23  	"time"
    24  
    25  	"google.golang.org/protobuf/proto"
    26  
    27  	"vitess.io/vitess/go/sync2"
    28  	"vitess.io/vitess/go/vt/discovery"
    29  	"vitess.io/vitess/go/vt/log"
    30  	"vitess.io/vitess/go/vt/topo/topoproto"
    31  
    32  	throttlerdatapb "vitess.io/vitess/go/vt/proto/throttlerdata"
    33  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    34  )
    35  
    36  type state string
    37  
    38  const (
    39  	stateIncreaseRate         state = "I"
    40  	stateDecreaseAndGuessRate state = "D"
    41  	stateEmergency            state = "E"
    42  )
    43  
    44  type replicationLagChange int
    45  
    46  const (
    47  	unknown replicationLagChange = iota
    48  	less
    49  	equal
    50  	greater
    51  )
    52  
    53  // replicaUnderTest refers to the replica to which we're currently "locked in"
    54  // i.e. we'll ignore lag records with lower lag from other replicas while we're
    55  // waiting for the next record of this replica under test.
    56  type replicaUnderTest struct {
    57  	// key holds the key value for the replica.
    58  	key        string
    59  	alias      string
    60  	tabletType topodatapb.TabletType
    61  	// state is the "state under test" which triggered the rate change.
    62  	state             state
    63  	nextAllowedChange time.Time
    64  }
    65  
    66  // memoryGranularity is the granularity at which values are rounded down in the
    67  // memory instance. This is done to bucket similar rates (see also memory.go).
    68  const memoryGranularity = 5
    69  
    70  // MaxReplicationLagModule calculates the maximum rate based on observed
    71  // replication lag and throttler rate changes.
    72  // It implements the Module interface.
    73  // Unless specified, the fields below are not guarded by a Mutex because they
    74  // are only accessed within the Go routine running ProcessRecords().
    75  type MaxReplicationLagModule struct {
    76  	// config holds all parameters for this module.
    77  	// It is only accessed by the Go routine which runs ProcessRecords() and does
    78  	// not require locking.
    79  	config MaxReplicationLagModuleConfig
    80  
    81  	// initialMaxReplicationLagSec is the initial value of
    82  	// config.MaxReplicationLagSec with which the module was started. We remember
    83  	// it in case we need to reset the configuration.
    84  	initialMaxReplicationLagSec int64
    85  
    86  	// mutableConfigMu guards all fields in the group below.
    87  	mutableConfigMu sync.Mutex
    88  	// mutableConfig is the mutable copy of "config" which is currently used by
    89  	// the module. By modifying "mutableConfig" and setting "applyMutableConfig"
    90  	// to true, the next ProcessRecords() execution will copy "mutableConfig"
    91  	// to "config" and become effective in the module.
    92  	mutableConfig MaxReplicationLagModuleConfig
    93  	// applyMutableConfig is set to true when ProcessRecords() should copy over
    94  	// "mutableConfig" to "config".
    95  	applyMutableConfig bool
    96  
    97  	// rate is the rate calculated for the throttler.
    98  	rate         sync2.AtomicInt64
    99  	currentState state
   100  	// lastRateChange is the time when rate was adjusted last.
   101  	lastRateChange             time.Time
   102  	replicaUnderTest           *replicaUnderTest
   103  	nextAllowedChangeAfterInit time.Time
   104  
   105  	actualRatesHistory *aggregatedIntervalHistory
   106  	// replicaLagCache stores the past lag records for all REPLICA tablets.
   107  	replicaLagCache *replicationLagCache
   108  	// rdonlyLagCache stores the past lag records for all RDONLY tablets.
   109  	rdonlyLagCache *replicationLagCache
   110  	// memory tracks known good and bad throttler rates.
   111  	memory *memory
   112  
   113  	// rateUpdateChan is the notification channel to tell the throttler when our
   114  	// max rate calculation has changed. The field is immutable (set in Start().)
   115  	rateUpdateChan chan<- struct{}
   116  
   117  	// lagRecords buffers the replication lag records received by the HealthCheck
   118  	// subscriber. ProcessRecords() will process them.
   119  	lagRecords chan replicationLagRecord
   120  	wg         sync.WaitGroup
   121  
   122  	// results caches the results of the latest processed replication lag records.
   123  	results *resultRing
   124  }
   125  
   126  // NewMaxReplicationLagModule will create a new module instance and set the
   127  // initial max replication lag limit to maxReplicationLag.
   128  func NewMaxReplicationLagModule(config MaxReplicationLagModuleConfig, actualRatesHistory *aggregatedIntervalHistory, nowFunc func() time.Time) (*MaxReplicationLagModule, error) {
   129  	if err := config.Verify(); err != nil {
   130  		return nil, fmt.Errorf("invalid NewMaxReplicationLagModuleConfig: %v", err)
   131  	}
   132  	rate := int64(ReplicationLagModuleDisabled)
   133  	if config.MaxReplicationLagSec != ReplicationLagModuleDisabled {
   134  		rate = config.InitialRate
   135  	}
   136  
   137  	m := &MaxReplicationLagModule{
   138  		initialMaxReplicationLagSec: config.MaxReplicationLagSec,
   139  		// Register "config" for a future config update.
   140  		mutableConfig:      config,
   141  		applyMutableConfig: true,
   142  		// Always start off with a non-zero rate because zero means all requests
   143  		// get throttled.
   144  		rate:           sync2.NewAtomicInt64(rate),
   145  		currentState:   stateIncreaseRate,
   146  		lastRateChange: nowFunc(),
   147  		memory:         newMemory(memoryGranularity, config.AgeBadRateAfter(), config.BadRateIncrease),
   148  		lagRecords:     make(chan replicationLagRecord, 10),
   149  		// Prevent an immediate increase of the initial rate.
   150  		nextAllowedChangeAfterInit: nowFunc().Add(config.MaxDurationBetweenIncreases()),
   151  		actualRatesHistory:         actualRatesHistory,
   152  		replicaLagCache:            newReplicationLagCache(1000),
   153  		rdonlyLagCache:             newReplicationLagCache(1000),
   154  		results:                    newResultRing(1000),
   155  	}
   156  
   157  	// Enforce a config update.
   158  	m.applyLatestConfig()
   159  
   160  	return m, nil
   161  }
   162  
   163  // Start launches a Go routine which reacts on replication lag records.
   164  // It implements the Module interface.
   165  func (m *MaxReplicationLagModule) Start(rateUpdateChan chan<- struct{}) {
   166  	m.rateUpdateChan = rateUpdateChan
   167  	m.wg.Add(1)
   168  	go m.ProcessRecords()
   169  }
   170  
   171  // Stop blocks until the module's Go routine is stopped.
   172  // It implements the Module interface.
   173  func (m *MaxReplicationLagModule) Stop() {
   174  	close(m.lagRecords)
   175  	m.wg.Wait()
   176  }
   177  
   178  // MaxRate returns the current maximum allowed rate.
   179  // It implements the Module interface.
   180  func (m *MaxReplicationLagModule) MaxRate() int64 {
   181  	return m.rate.Get()
   182  }
   183  
   184  // applyLatestConfig checks if "mutableConfig" should be applied as the new
   185  // "config" and does so when necessary.
   186  func (m *MaxReplicationLagModule) applyLatestConfig() {
   187  	var config MaxReplicationLagModuleConfig
   188  	applyConfig := false
   189  
   190  	m.mutableConfigMu.Lock()
   191  	if m.applyMutableConfig {
   192  		config = m.mutableConfig
   193  		applyConfig = true
   194  		m.applyMutableConfig = false
   195  	}
   196  	m.mutableConfigMu.Unlock()
   197  
   198  	// No locking required here because this method is only called from the same
   199  	// Go routine as ProcessRecords() or the constructor.
   200  	if applyConfig {
   201  		m.config = config
   202  		m.memory.updateAgingConfiguration(config.AgeBadRateAfter(), config.BadRateIncrease)
   203  	}
   204  }
   205  
   206  func (m *MaxReplicationLagModule) getConfiguration() *throttlerdatapb.Configuration {
   207  	m.mutableConfigMu.Lock()
   208  	defer m.mutableConfigMu.Unlock()
   209  	return proto.Clone(m.mutableConfig.Configuration).(*throttlerdatapb.Configuration)
   210  }
   211  
   212  func (m *MaxReplicationLagModule) updateConfiguration(configuration *throttlerdatapb.Configuration, copyZeroValues bool) error {
   213  	m.mutableConfigMu.Lock()
   214  	defer m.mutableConfigMu.Unlock()
   215  
   216  	newConfig := m.mutableConfig
   217  
   218  	if copyZeroValues {
   219  		newConfig.Configuration = proto.Clone(configuration).(*throttlerdatapb.Configuration)
   220  	} else {
   221  		proto.Merge(newConfig.Configuration, configuration)
   222  	}
   223  
   224  	if err := newConfig.Verify(); err != nil {
   225  		return err
   226  	}
   227  	m.mutableConfig = newConfig
   228  	m.applyMutableConfig = true
   229  	return nil
   230  }
   231  
   232  func (m *MaxReplicationLagModule) resetConfiguration() {
   233  	m.mutableConfigMu.Lock()
   234  	defer m.mutableConfigMu.Unlock()
   235  
   236  	m.mutableConfig = NewMaxReplicationLagModuleConfig(m.initialMaxReplicationLagSec)
   237  	m.applyMutableConfig = true
   238  }
   239  
   240  // RecordReplicationLag records the current replication lag for processing.
   241  func (m *MaxReplicationLagModule) RecordReplicationLag(t time.Time, th *discovery.TabletHealth) {
   242  	m.mutableConfigMu.Lock()
   243  	if m.mutableConfig.MaxReplicationLagSec == ReplicationLagModuleDisabled {
   244  		m.mutableConfigMu.Unlock()
   245  		return
   246  	}
   247  	m.mutableConfigMu.Unlock()
   248  
   249  	// Buffer data point for now to unblock the HealthCheck subscriber and process
   250  	// it asynchronously in ProcessRecords().
   251  	m.lagRecords <- replicationLagRecord{t, *th}
   252  }
   253  
   254  // ProcessRecords is the main loop, run in a separate Go routine, which
   255  // reacts to any replication lag updates (recordings).
   256  func (m *MaxReplicationLagModule) ProcessRecords() {
   257  	defer m.wg.Done()
   258  
   259  	for lagRecord := range m.lagRecords {
   260  		m.processRecord(lagRecord)
   261  	}
   262  }
   263  
   264  func (m *MaxReplicationLagModule) processRecord(lagRecord replicationLagRecord) {
   265  	m.applyLatestConfig()
   266  
   267  	m.lagCache(lagRecord).add(lagRecord)
   268  
   269  	m.lagCache(lagRecord).sortByLag(m.getNSlowestReplicasConfig(lagRecord), m.config.MaxReplicationLagSec+1)
   270  
   271  	m.recalculateRate(lagRecord)
   272  }
   273  
   274  func (m *MaxReplicationLagModule) lagCache(lagRecord replicationLagRecord) *replicationLagCache {
   275  	return m.lagCacheByType(lagRecord.Target.TabletType)
   276  }
   277  
   278  func (m *MaxReplicationLagModule) lagCacheByType(tabletType topodatapb.TabletType) *replicationLagCache {
   279  	switch tabletType {
   280  	case topodatapb.TabletType_REPLICA:
   281  		return m.replicaLagCache
   282  	case topodatapb.TabletType_RDONLY:
   283  		return m.rdonlyLagCache
   284  	default:
   285  		panic(fmt.Sprintf("BUG: invalid TabletType forwarded: %v", tabletType))
   286  	}
   287  }
   288  
   289  func (m *MaxReplicationLagModule) getNSlowestReplicasConfig(lagRecord replicationLagRecord) int {
   290  	switch lagRecord.Target.TabletType {
   291  	case topodatapb.TabletType_REPLICA:
   292  		return int(m.config.IgnoreNSlowestReplicas)
   293  	case topodatapb.TabletType_RDONLY:
   294  		return int(m.config.IgnoreNSlowestRdonlys)
   295  	default:
   296  		panic(fmt.Sprintf("BUG: invalid TabletType forwarded: %v", lagRecord))
   297  	}
   298  }
   299  
   300  func (m *MaxReplicationLagModule) recalculateRate(lagRecordNow replicationLagRecord) {
   301  	if lagRecordNow.isZero() {
   302  		panic("rate recalculation was triggered with a zero replication lag record")
   303  	}
   304  	now := lagRecordNow.time
   305  	lagNow := lagRecordNow.lag()
   306  
   307  	m.memory.ageBadRate(now)
   308  
   309  	r := result{
   310  		Now:            now,
   311  		RateChange:     unchangedRate,
   312  		lastRateChange: m.lastRateChange,
   313  		OldState:       m.currentState,
   314  		NewState:       m.currentState,
   315  		OldRate:        m.rate.Get(),
   316  		NewRate:        m.rate.Get(),
   317  		LagRecordNow:   lagRecordNow,
   318  	}
   319  	if lagNow <= m.config.TargetReplicationLagSec {
   320  		// Lag in range: [0, target]
   321  		r.TestedState = stateIncreaseRate
   322  	} else if lagNow <= m.config.MaxReplicationLagSec {
   323  		// Lag in range: (target, max]
   324  		r.TestedState = stateDecreaseAndGuessRate
   325  	} else {
   326  		// Lag in range: (max, infinite]
   327  		r.TestedState = stateEmergency
   328  	}
   329  
   330  	// Declare new variables before we use "goto". Required by the Go compiler.
   331  	var clear bool
   332  	var clearReason string
   333  
   334  	if m.lagCache(lagRecordNow).ignoreSlowReplica(discovery.TabletToMapKey(lagRecordNow.Tablet)) {
   335  		r.Reason = fmt.Sprintf("skipping this replica because it's among the %d slowest %v tablets", m.getNSlowestReplicasConfig(lagRecordNow), lagRecordNow.Target.TabletType.String())
   336  		goto logResult
   337  	}
   338  
   339  	clear, clearReason = m.clearReplicaUnderTest(now, r.TestedState, lagRecordNow)
   340  	if clear {
   341  		clearReason = fmt.Sprintf("; previous replica under test (%v) cleared because %v", m.replicaUnderTest.alias, clearReason)
   342  		m.replicaUnderTest = nil
   343  	}
   344  
   345  	if r.TestedState == stateIncreaseRate && now.Before(m.nextAllowedChangeAfterInit) {
   346  		r.Reason = fmt.Sprintf("waiting %.1f more seconds since init until we try the first increase", m.nextAllowedChangeAfterInit.Sub(now).Seconds())
   347  		goto logResult
   348  	}
   349  
   350  	if !m.isReplicaUnderTest(&r, now, r.TestedState, lagRecordNow) {
   351  		goto logResult
   352  	}
   353  
   354  	// Process the lag record and adjust the rate.
   355  	if m.replicaUnderTest != nil {
   356  		// We're checking the same replica again. The old value is no longer needed.
   357  		m.replicaUnderTest = nil
   358  	}
   359  	switch r.TestedState {
   360  	case stateIncreaseRate:
   361  		m.increaseRate(&r, now, lagRecordNow)
   362  	case stateDecreaseAndGuessRate:
   363  		m.decreaseAndGuessRate(&r, now, lagRecordNow)
   364  	case stateEmergency:
   365  		m.emergency(&r, now, lagRecordNow)
   366  	default:
   367  		panic(fmt.Sprintf("BUG: invalid state: %v", r.TestedState))
   368  	}
   369  
   370  logResult:
   371  	r.HighestGood = m.memory.highestGood()
   372  	r.LowestBad = m.memory.lowestBad()
   373  
   374  	if clear {
   375  		r.Reason += clearReason
   376  	}
   377  
   378  	log.Infof("%v", r)
   379  	m.results.add(r)
   380  }
   381  
   382  // clearReplicaUnderTest returns true if the current "replica under test" should
   383  // be cleared e.g. because the new lag record is more severe or we did not hear
   384  // back from the replica under test for a while.
   385  func (m *MaxReplicationLagModule) clearReplicaUnderTest(now time.Time, testedState state, lagRecordNow replicationLagRecord) (bool, string) {
   386  	if m.replicaUnderTest == nil {
   387  		return false, ""
   388  	}
   389  
   390  	if stateGreater(testedState, m.replicaUnderTest.state) {
   391  		// Example: "decrease" > "increase" will return true.
   392  		return true, fmt.Sprintf("the new state is more severe (%v -> %v)", m.replicaUnderTest.state, testedState)
   393  	}
   394  
   395  	// Verify that the current replica under test is not in an error state.
   396  	lr := lagRecordNow
   397  	if m.replicaUnderTest.key != discovery.TabletToMapKey(lr.Tablet) {
   398  		lr = m.lagCacheByType(m.replicaUnderTest.tabletType).latest(m.replicaUnderTest.key)
   399  	}
   400  	if lr.isZero() {
   401  		// Replica is no longer tracked by the cache i.e. may be offline.
   402  		return true, "it is no longer actively tracked"
   403  	}
   404  	if lr.LastError != nil {
   405  		// LastError is set i.e. HealthCheck module cannot connect and the cached
   406  		// data for the replica might be outdated.
   407  		return true, "it has LastError set i.e. is no longer correctly tracked"
   408  	}
   409  
   410  	if m.lagCacheByType(m.replicaUnderTest.tabletType).isIgnored(m.replicaUnderTest.key) {
   411  		// "replica under test" has become a slow, ignored replica.
   412  		return true, "it is ignored as a slow replica"
   413  	}
   414  
   415  	if now.After(m.replicaUnderTest.nextAllowedChange.Add(m.config.MaxDurationBetweenIncreases())) {
   416  		// We haven't heard from the replica under test for too long. Assume it did
   417  		// timeout.
   418  		return true, fmt.Sprintf("we didn't see a recent record from it within the last %.1f seconds", m.config.MaxDurationBetweenIncreases().Seconds())
   419  	}
   420  
   421  	return false, ""
   422  }
   423  
   424  // stateGreater returns true if a > b i.e. the state "a" is more severe than
   425  // "b". For example, "decrease" > "increase" returns true.
   426  func stateGreater(a, b state) bool {
   427  	switch a {
   428  	case stateIncreaseRate:
   429  		return false
   430  	case stateDecreaseAndGuessRate:
   431  		return b == stateIncreaseRate
   432  	case stateEmergency:
   433  		return b == stateIncreaseRate || b == stateDecreaseAndGuessRate
   434  	default:
   435  		panic(fmt.Sprintf("BUG: cannot compare states: %v and %v", a, b))
   436  	}
   437  }
   438  
   439  // isReplicaUnderTest returns true if a 'replica under test' is currently set
   440  // and we should not skip the current replica ("lagRecordNow").
   441  // Even if it's the same replica we may skip it and return false because
   442  // we want to wait longer for the propagation of the current rate change.
   443  func (m *MaxReplicationLagModule) isReplicaUnderTest(r *result, now time.Time, testedState state, lagRecordNow replicationLagRecord) bool {
   444  	if m.replicaUnderTest == nil {
   445  		return true
   446  	}
   447  
   448  	if m.replicaUnderTest.key != discovery.TabletToMapKey(lagRecordNow.Tablet) {
   449  		r.Reason = fmt.Sprintf("skipping this replica because we're waiting for the next lag record from the 'replica under test': %v", m.replicaUnderTest.alias)
   450  		return false
   451  	}
   452  
   453  	if stateGreater(m.replicaUnderTest.state, testedState) {
   454  		// The state of the "replica under test" improved e.g. it went from
   455  		// "decreased" to "increased".
   456  		// Stop testing (i.e. skipping) it and do not wait until the full test
   457  		// duration is up.
   458  		return true
   459  	}
   460  
   461  	if now.Before(m.replicaUnderTest.nextAllowedChange) {
   462  		r.Reason = fmt.Sprintf("waiting %.1f more seconds to see if the lag has changed", m.replicaUnderTest.nextAllowedChange.Sub(now).Seconds())
   463  		return false
   464  	}
   465  
   466  	return true
   467  }
   468  
   469  func (m *MaxReplicationLagModule) increaseRate(r *result, now time.Time, lagRecordNow replicationLagRecord) {
   470  	m.markCurrentRateAsBadOrGood(r, now, stateIncreaseRate, unknown)
   471  
   472  	oldRate := m.rate.Get()
   473  	actualRate := m.actualRatesHistory.average(m.lastRateChange, now)
   474  	// Do not increase the rate if we didn't see an actual rate that approached the current max rate.
   475  	// actualRate will be NaN if there were no observations in the history.
   476  	if math.IsNaN(actualRate) ||
   477  		actualRate < float64(oldRate)*m.config.MaxRateApproachThreshold {
   478  		r.RateChange = unchangedRate
   479  		r.OldRate = oldRate
   480  		r.NewRate = oldRate
   481  		r.Reason = fmt.Sprintf("Skipping periodic increase of the max rate (%v) since the actual: average rate (%v) did not approach it.", oldRate, actualRate)
   482  		r.CurrentRate = int64(actualRate)
   483  		return
   484  	}
   485  
   486  	// Calculate new rate based on the previous (preferably highest good) rate.
   487  	highestGood := m.memory.highestGood()
   488  	previousRateSource := "highest known good rate"
   489  	previousRate := float64(highestGood)
   490  	if previousRate == 0.0 {
   491  		// No known high good rate. Use the actual value instead.
   492  		// (It might be lower because the system was slower or the throttler rate was
   493  		// set by a different module and not us.)
   494  		previousRateSource = "previous actual rate"
   495  		previousRate = actualRate
   496  	}
   497  
   498  	// a) Increase rate by MaxIncrease.
   499  	increaseReason := fmt.Sprintf("a max increase of %.1f%%", m.config.MaxIncrease*100)
   500  	rate := previousRate * (1 + m.config.MaxIncrease)
   501  
   502  	// b) Make the increase less aggressive if it goes above the bad rate.
   503  	lowestBad := float64(m.memory.lowestBad())
   504  	if lowestBad != 0 {
   505  		if rate > lowestBad {
   506  			// New rate will be the middle value of [previous rate, lowest bad rate].
   507  			rate = previousRate + (lowestBad-previousRate)/2
   508  			increaseReason += fmt.Sprintf(" (but limited to the middle value in the range [previous rate, lowest bad rate]: [%.0f, %.0f]", previousRate, lowestBad)
   509  		}
   510  	}
   511  	// c) Always make minimum progress compared to oldRate.
   512  	// Necessary for the following cases:
   513  	// - MaxIncrease is too low and the rate might not increase
   514  	// - after the new rate was limited by the bad rate, we got the old rate
   515  	//   (In this case we might slightly go above the bad rate which we accept.)
   516  	if int64(rate) <= oldRate {
   517  		rate = float64(oldRate) + memoryGranularity
   518  		increaseReason += fmt.Sprintf(" (minimum progress by %v)", memoryGranularity)
   519  		previousRateSource = "previous set rate"
   520  		previousRate = float64(oldRate)
   521  	}
   522  
   523  	increase := (rate - previousRate) / previousRate
   524  	minTestDuration := m.minTestDurationUntilNextIncrease(increase)
   525  	reason := fmt.Sprintf("periodic increase of the %v from %d to %d (by %.1f%%) based on %v to find out the maximum - next allowed increase in %.0f seconds",
   526  		previousRateSource, int64(previousRate), int64(rate), increase*100, increaseReason, minTestDuration.Seconds())
   527  	m.updateRate(r, stateIncreaseRate, int64(rate), reason, now, lagRecordNow, minTestDuration)
   528  }
   529  
   530  func (m *MaxReplicationLagModule) minTestDurationUntilNextIncrease(increase float64) time.Duration {
   531  	minDuration := m.config.MinDurationBetweenIncreases()
   532  	// We may have to wait longer than the configured minimum duration
   533  	// until we see an effect of the increase.
   534  	// Example: If the increase was fully over the capacity, it will take
   535  	// 1 / increase seconds until the replication lag goes up by 1 second.
   536  	// E.g.
   537  	// (If the system was already at its maximum capacity (e.g. 1k QPS) and we
   538  	// increase the rate by e.g. 5% to 1050 QPS, it will take 20 seconds until
   539  	// 1000 extra queries are buffered and the lag increases by 1 second.)
   540  	// On top of that, add 2 extra seconds to account for a delayed propagation
   541  	// of the data (because the throttler takes over the updated rate only every
   542  	// second and it publishes its rate history only after a second is over).
   543  	// TODO(mberlin): Instead of adding 2 seconds, should we wait for twice the
   544  	// calculated time instead?
   545  	minPropagationTime := time.Duration(1.0/increase+2) * time.Second
   546  	if minPropagationTime > minDuration {
   547  		minDuration = minPropagationTime
   548  	}
   549  	if minDuration > m.config.MaxDurationBetweenIncreases() {
   550  		// Cap the rate to a reasonable amount of time (very small increases may
   551  		// result into a 20 minutes wait otherwise.)
   552  		minDuration = m.config.MaxDurationBetweenIncreases()
   553  	}
   554  	return minDuration
   555  }
   556  
   557  func (m *MaxReplicationLagModule) decreaseAndGuessRate(r *result, now time.Time, lagRecordNow replicationLagRecord) {
   558  	// Guess replication rate based on the difference in the replication lag of this
   559  	// particular replica.
   560  	lagRecordBefore := m.lagCache(lagRecordNow).atOrAfter(discovery.TabletToMapKey(lagRecordNow.Tablet), m.lastRateChange)
   561  	if lagRecordBefore.isZero() {
   562  		// We should see at least "lagRecordNow" here because we did just insert it
   563  		// in processRecord().
   564  		panic(fmt.Sprintf("BUG: replicationLagCache did not return the lagRecord for current replica: %v or a previous record of it. lastRateChange: %v replicationLagCache size: %v entries: %v", lagRecordNow, m.lastRateChange, len(m.lagCache(lagRecordNow).entries), m.lagCache(lagRecordNow).entries))
   565  	}
   566  	// Store the record in the result.
   567  	r.LagRecordBefore = lagRecordBefore
   568  	if lagRecordBefore.time == lagRecordNow.time {
   569  		// No lag record for this replica in the time span
   570  		// [last rate change, current lag record).
   571  		// Without it we won't be able to guess the replication rate.
   572  		// We err on the side of caution and reduce the rate by half the emergency
   573  		// decrease percentage.
   574  		decreaseReason := fmt.Sprintf("no previous lag record for this replica available since the last rate change (%.1f seconds ago)", now.Sub(m.lastRateChange).Seconds())
   575  		m.decreaseRateByPercentage(r, now, lagRecordNow, stateDecreaseAndGuessRate, m.config.EmergencyDecrease/2, decreaseReason)
   576  		return
   577  	}
   578  
   579  	// Analyze if the past rate was good or bad.
   580  	lagBefore := lagRecordBefore.lag()
   581  	lagNow := lagRecordNow.lag()
   582  	replicationLagChange := less
   583  	// Note that we consider lag changes of 1 second as equal as well because
   584  	// they might be a rounding error in MySQL due to using timestamps at
   585  	// second granularity.
   586  	if lagNow == lagBefore || math.Abs(float64(lagNow-lagBefore)) == 1 {
   587  		replicationLagChange = equal
   588  	} else if lagNow > lagBefore {
   589  		replicationLagChange = greater
   590  	}
   591  	m.markCurrentRateAsBadOrGood(r, now, stateDecreaseAndGuessRate, replicationLagChange)
   592  
   593  	if replicationLagChange == equal {
   594  		// The replication lag did not change. Keep going at the current rate.
   595  		r.Reason = fmt.Sprintf("did not decrease the rate because the lag did not change (assuming a 1s error margin)") // nolint
   596  		return
   597  	}
   598  
   599  	// Find out the average rate (per second) at which we inserted data
   600  	// at the primary during the observed timespan.
   601  	from := lagRecordBefore.time
   602  	to := lagRecordNow.time
   603  	avgPrimaryRate := m.actualRatesHistory.average(from, to)
   604  	if math.IsNaN(avgPrimaryRate) {
   605  		// NaN (0.0/0.0) occurs when no observations were in the timespan.
   606  		// Wait for more rate observations.
   607  		r.Reason = fmt.Sprintf("did not decrease the rate because the throttler has not recorded its historic rates in the range [%v , %v]", from.Format("15:04:05"), to.Format("15:04:05"))
   608  		return
   609  	}
   610  
   611  	// Sanity check and correct the data points if necessary.
   612  	d := lagRecordNow.time.Sub(lagRecordBefore.time)
   613  	lagDifference := time.Duration(lagRecordNow.lag()-lagRecordBefore.lag()) * time.Second
   614  	if lagDifference > d {
   615  		log.Errorf("Replication lag increase is higher than the elapsed time: %v > %v. This should not happen. Replication Lag Data points: Before: %+v Now: %+v", lagDifference, d, lagRecordBefore, lagRecordNow)
   616  		d = lagDifference
   617  	}
   618  
   619  	// Guess the replica capacity based on the replication lag change.
   620  	rate, reason := m.guessReplicationRate(r, avgPrimaryRate, lagBefore, lagNow, lagDifference, d)
   621  
   622  	m.updateRate(r, stateDecreaseAndGuessRate, rate, reason, now, lagRecordNow, m.config.MinDurationBetweenDecreases())
   623  }
   624  
   625  // guessReplicationRate guesses the actual replication rate based on the new bac
   626  // Note that "lagDifference" can be positive (lag increased) or negative (lag
   627  // decreased).
   628  func (m *MaxReplicationLagModule) guessReplicationRate(r *result, avgPrimaryRate float64, lagBefore, lagNow int64, lagDifference, d time.Duration) (int64, string) {
   629  	// avgReplicationRate is the average rate (per second) at which the replica
   630  	// applied transactions from the replication stream. We infer the value
   631  	// from the relative change in the replication lag.
   632  	avgReplicationRate := avgPrimaryRate * (d - lagDifference).Seconds() / d.Seconds()
   633  	if avgReplicationRate <= 0 {
   634  		log.Warningf("guessed Replication rate was <= 0 (%v). Primary rate: %v d: %.1f lag difference: %.1f", avgReplicationRate, avgPrimaryRate, d.Seconds(), lagDifference.Seconds())
   635  		avgReplicationRate = 1
   636  	}
   637  	r.PrimaryRate = int64(avgPrimaryRate)
   638  	r.GuessedReplicationRate = int64(avgReplicationRate)
   639  
   640  	oldRequestsBehind := 0.0
   641  	// If the old lag was > 0s, the replica needs to catch up on that as well.
   642  	if lagNow > lagBefore {
   643  		oldRequestsBehind = avgReplicationRate * float64(lagBefore)
   644  	}
   645  	newRequestsBehind := 0.0
   646  	// If the lag increased (i.e. replication rate was slower), the replica must make up
   647  	// for the difference in the future.
   648  	if avgReplicationRate < avgPrimaryRate {
   649  		newRequestsBehind = (avgPrimaryRate - avgReplicationRate) * d.Seconds()
   650  	}
   651  	requestsBehind := oldRequestsBehind + newRequestsBehind
   652  	r.GuessedReplicationBacklogOld = int(oldRequestsBehind)
   653  	r.GuessedReplicationBacklogNew = int(newRequestsBehind)
   654  
   655  	newRate := avgReplicationRate
   656  	// Reduce the new rate such that it has time to catch up the requests it's
   657  	// behind within the next interval.
   658  	futureRequests := newRate * m.config.SpreadBacklogAcross().Seconds()
   659  	newRate *= (futureRequests - requestsBehind) / futureRequests
   660  	var reason string
   661  	if newRate < 1 {
   662  		// Backlog is too high. Reduce rate to 1 request/second.
   663  		// TODO(mberlin): Make this a constant.
   664  		newRate = 1
   665  		reason = fmt.Sprintf("based on the guessed replication rate of: %v the replica won't be able to process the guessed backlog of %d requests within the next %.f seconds", int64(avgReplicationRate), int64(requestsBehind), m.config.SpreadBacklogAcross().Seconds())
   666  	} else {
   667  		reason = fmt.Sprintf("new rate is %d lower than the guessed replication rate to account for a guessed backlog of %d requests over %.f seconds", int64(avgReplicationRate-newRate), int64(requestsBehind), m.config.SpreadBacklogAcross().Seconds())
   668  	}
   669  
   670  	return int64(newRate), reason
   671  }
   672  
   673  func (m *MaxReplicationLagModule) emergency(r *result, now time.Time, lagRecordNow replicationLagRecord) {
   674  	m.markCurrentRateAsBadOrGood(r, now, stateEmergency, unknown)
   675  
   676  	decreaseReason := fmt.Sprintf("replication lag went beyond max: %d > %d", lagRecordNow.lag(), m.config.MaxReplicationLagSec)
   677  	m.decreaseRateByPercentage(r, now, lagRecordNow, stateEmergency, m.config.EmergencyDecrease, decreaseReason)
   678  }
   679  
   680  func (m *MaxReplicationLagModule) decreaseRateByPercentage(r *result, now time.Time, lagRecordNow replicationLagRecord, newState state, decrease float64, decreaseReason string) {
   681  	oldRate := m.rate.Get()
   682  	rate := int64(float64(oldRate) - float64(oldRate)*decrease)
   683  	if rate == 0 {
   684  		// Never fully stop throttling.
   685  		rate = 1
   686  	}
   687  
   688  	reason := fmt.Sprintf("%v: reducing previous rate of %d by %.f%% to: %v", decreaseReason, oldRate, decrease*100, rate)
   689  	m.updateRate(r, newState, rate, reason, now, lagRecordNow, m.config.MinDurationBetweenDecreases())
   690  }
   691  
   692  func (m *MaxReplicationLagModule) updateRate(r *result, newState state, rate int64, reason string, now time.Time, lagRecordNow replicationLagRecord, testDuration time.Duration) {
   693  	oldRate := m.rate.Get()
   694  
   695  	m.currentState = newState
   696  
   697  	// Update result with the new state.
   698  	r.NewState = newState
   699  	r.NewRate = rate
   700  	r.Reason = reason
   701  	if rate > oldRate {
   702  		r.RateChange = increasedRate
   703  	} else if rate < oldRate {
   704  		r.RateChange = decreasedRate
   705  	}
   706  
   707  	m.lastRateChange = now
   708  	m.replicaUnderTest = &replicaUnderTest{discovery.TabletToMapKey(lagRecordNow.Tablet), topoproto.TabletAliasString(lagRecordNow.Tablet.Alias), lagRecordNow.Target.TabletType, newState, now.Add(testDuration)}
   709  
   710  	if rate == oldRate {
   711  		return
   712  	}
   713  	m.rate.Set(int64(rate))
   714  	// Notify the throttler that we updated our max rate.
   715  	m.rateUpdateChan <- struct{}{}
   716  }
   717  
   718  // markCurrentRateAsBadOrGood determines the actual rate between the last rate
   719  // change and "now" and determines if that rate was bad or good.
   720  func (m *MaxReplicationLagModule) markCurrentRateAsBadOrGood(r *result, now time.Time, newState state, replicationLagChange replicationLagChange) {
   721  	if m.lastRateChange.IsZero() {
   722  		// Module was just started. We don't have any data points yet.
   723  		r.GoodOrBad = ignoredRate
   724  		r.MemorySkipReason = "rate was never changed before (initial start)"
   725  		return
   726  	}
   727  
   728  	// Use the actual rate instead of the set rate.
   729  	// (It might be lower because the system was slower or the throttler rate was
   730  	// set by a different module and not us.)
   731  	rate := m.actualRatesHistory.average(m.lastRateChange, now)
   732  	if math.IsNaN(rate) {
   733  		// NaN (0.0/0.0) occurs when no records were in the timespan.
   734  		// Wait for more records.
   735  		r.GoodOrBad = ignoredRate
   736  		r.MemorySkipReason = "cannot determine actual rate: no records in [lastRateChange, now]"
   737  		return
   738  	}
   739  
   740  	rateIsGood := false
   741  
   742  	switch m.currentState {
   743  	case stateIncreaseRate:
   744  		switch newState {
   745  		case stateIncreaseRate:
   746  			rateIsGood = true
   747  		case stateDecreaseAndGuessRate:
   748  			rateIsGood = false
   749  		case stateEmergency:
   750  			rateIsGood = false
   751  		}
   752  	case stateDecreaseAndGuessRate:
   753  		switch newState {
   754  		case stateIncreaseRate:
   755  			rateIsGood = true
   756  		case stateDecreaseAndGuessRate:
   757  			switch replicationLagChange {
   758  			case unknown:
   759  				return
   760  			case less:
   761  				rateIsGood = true
   762  			case equal:
   763  				// Replication lag kept constant. Impossible to judge if the rate is good or bad.
   764  				return
   765  			case greater:
   766  				rateIsGood = false
   767  			}
   768  		case stateEmergency:
   769  			rateIsGood = false
   770  		}
   771  	case stateEmergency:
   772  		// Rate changes initiated during an "emergency" phase provide no meaningful data point.
   773  		r.MemorySkipReason = "not marking a rate as good or bad while in the emergency state"
   774  		m.memory.touchBadRateAge(now)
   775  		return
   776  	}
   777  
   778  	r.CurrentRate = int64(rate)
   779  	if rateIsGood {
   780  		if err := m.memory.markGood(int64(rate)); err == nil {
   781  			r.GoodOrBad = goodRate
   782  		} else {
   783  			r.MemorySkipReason = err.Error()
   784  		}
   785  	} else {
   786  		if err := m.memory.markBad(int64(rate), now); err == nil {
   787  			r.GoodOrBad = badRate
   788  		} else {
   789  			r.MemorySkipReason = err.Error()
   790  		}
   791  	}
   792  }
   793  
   794  func (m *MaxReplicationLagModule) log() []result {
   795  	return m.results.latestValues()
   796  }