vitess.io/vitess@v0.16.2/go/vt/throttler/max_replication_lag_module.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package throttler 18 19 import ( 20 "fmt" 21 "math" 22 "sync" 23 "time" 24 25 "google.golang.org/protobuf/proto" 26 27 "vitess.io/vitess/go/sync2" 28 "vitess.io/vitess/go/vt/discovery" 29 "vitess.io/vitess/go/vt/log" 30 "vitess.io/vitess/go/vt/topo/topoproto" 31 32 throttlerdatapb "vitess.io/vitess/go/vt/proto/throttlerdata" 33 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 34 ) 35 36 type state string 37 38 const ( 39 stateIncreaseRate state = "I" 40 stateDecreaseAndGuessRate state = "D" 41 stateEmergency state = "E" 42 ) 43 44 type replicationLagChange int 45 46 const ( 47 unknown replicationLagChange = iota 48 less 49 equal 50 greater 51 ) 52 53 // replicaUnderTest refers to the replica to which we're currently "locked in" 54 // i.e. we'll ignore lag records with lower lag from other replicas while we're 55 // waiting for the next record of this replica under test. 56 type replicaUnderTest struct { 57 // key holds the key value for the replica. 58 key string 59 alias string 60 tabletType topodatapb.TabletType 61 // state is the "state under test" which triggered the rate change. 62 state state 63 nextAllowedChange time.Time 64 } 65 66 // memoryGranularity is the granularity at which values are rounded down in the 67 // memory instance. This is done to bucket similar rates (see also memory.go). 68 const memoryGranularity = 5 69 70 // MaxReplicationLagModule calculates the maximum rate based on observed 71 // replication lag and throttler rate changes. 72 // It implements the Module interface. 73 // Unless specified, the fields below are not guarded by a Mutex because they 74 // are only accessed within the Go routine running ProcessRecords(). 75 type MaxReplicationLagModule struct { 76 // config holds all parameters for this module. 77 // It is only accessed by the Go routine which runs ProcessRecords() and does 78 // not require locking. 79 config MaxReplicationLagModuleConfig 80 81 // initialMaxReplicationLagSec is the initial value of 82 // config.MaxReplicationLagSec with which the module was started. We remember 83 // it in case we need to reset the configuration. 84 initialMaxReplicationLagSec int64 85 86 // mutableConfigMu guards all fields in the group below. 87 mutableConfigMu sync.Mutex 88 // mutableConfig is the mutable copy of "config" which is currently used by 89 // the module. By modifying "mutableConfig" and setting "applyMutableConfig" 90 // to true, the next ProcessRecords() execution will copy "mutableConfig" 91 // to "config" and become effective in the module. 92 mutableConfig MaxReplicationLagModuleConfig 93 // applyMutableConfig is set to true when ProcessRecords() should copy over 94 // "mutableConfig" to "config". 95 applyMutableConfig bool 96 97 // rate is the rate calculated for the throttler. 98 rate sync2.AtomicInt64 99 currentState state 100 // lastRateChange is the time when rate was adjusted last. 101 lastRateChange time.Time 102 replicaUnderTest *replicaUnderTest 103 nextAllowedChangeAfterInit time.Time 104 105 actualRatesHistory *aggregatedIntervalHistory 106 // replicaLagCache stores the past lag records for all REPLICA tablets. 107 replicaLagCache *replicationLagCache 108 // rdonlyLagCache stores the past lag records for all RDONLY tablets. 109 rdonlyLagCache *replicationLagCache 110 // memory tracks known good and bad throttler rates. 111 memory *memory 112 113 // rateUpdateChan is the notification channel to tell the throttler when our 114 // max rate calculation has changed. The field is immutable (set in Start().) 115 rateUpdateChan chan<- struct{} 116 117 // lagRecords buffers the replication lag records received by the HealthCheck 118 // subscriber. ProcessRecords() will process them. 119 lagRecords chan replicationLagRecord 120 wg sync.WaitGroup 121 122 // results caches the results of the latest processed replication lag records. 123 results *resultRing 124 } 125 126 // NewMaxReplicationLagModule will create a new module instance and set the 127 // initial max replication lag limit to maxReplicationLag. 128 func NewMaxReplicationLagModule(config MaxReplicationLagModuleConfig, actualRatesHistory *aggregatedIntervalHistory, nowFunc func() time.Time) (*MaxReplicationLagModule, error) { 129 if err := config.Verify(); err != nil { 130 return nil, fmt.Errorf("invalid NewMaxReplicationLagModuleConfig: %v", err) 131 } 132 rate := int64(ReplicationLagModuleDisabled) 133 if config.MaxReplicationLagSec != ReplicationLagModuleDisabled { 134 rate = config.InitialRate 135 } 136 137 m := &MaxReplicationLagModule{ 138 initialMaxReplicationLagSec: config.MaxReplicationLagSec, 139 // Register "config" for a future config update. 140 mutableConfig: config, 141 applyMutableConfig: true, 142 // Always start off with a non-zero rate because zero means all requests 143 // get throttled. 144 rate: sync2.NewAtomicInt64(rate), 145 currentState: stateIncreaseRate, 146 lastRateChange: nowFunc(), 147 memory: newMemory(memoryGranularity, config.AgeBadRateAfter(), config.BadRateIncrease), 148 lagRecords: make(chan replicationLagRecord, 10), 149 // Prevent an immediate increase of the initial rate. 150 nextAllowedChangeAfterInit: nowFunc().Add(config.MaxDurationBetweenIncreases()), 151 actualRatesHistory: actualRatesHistory, 152 replicaLagCache: newReplicationLagCache(1000), 153 rdonlyLagCache: newReplicationLagCache(1000), 154 results: newResultRing(1000), 155 } 156 157 // Enforce a config update. 158 m.applyLatestConfig() 159 160 return m, nil 161 } 162 163 // Start launches a Go routine which reacts on replication lag records. 164 // It implements the Module interface. 165 func (m *MaxReplicationLagModule) Start(rateUpdateChan chan<- struct{}) { 166 m.rateUpdateChan = rateUpdateChan 167 m.wg.Add(1) 168 go m.ProcessRecords() 169 } 170 171 // Stop blocks until the module's Go routine is stopped. 172 // It implements the Module interface. 173 func (m *MaxReplicationLagModule) Stop() { 174 close(m.lagRecords) 175 m.wg.Wait() 176 } 177 178 // MaxRate returns the current maximum allowed rate. 179 // It implements the Module interface. 180 func (m *MaxReplicationLagModule) MaxRate() int64 { 181 return m.rate.Get() 182 } 183 184 // applyLatestConfig checks if "mutableConfig" should be applied as the new 185 // "config" and does so when necessary. 186 func (m *MaxReplicationLagModule) applyLatestConfig() { 187 var config MaxReplicationLagModuleConfig 188 applyConfig := false 189 190 m.mutableConfigMu.Lock() 191 if m.applyMutableConfig { 192 config = m.mutableConfig 193 applyConfig = true 194 m.applyMutableConfig = false 195 } 196 m.mutableConfigMu.Unlock() 197 198 // No locking required here because this method is only called from the same 199 // Go routine as ProcessRecords() or the constructor. 200 if applyConfig { 201 m.config = config 202 m.memory.updateAgingConfiguration(config.AgeBadRateAfter(), config.BadRateIncrease) 203 } 204 } 205 206 func (m *MaxReplicationLagModule) getConfiguration() *throttlerdatapb.Configuration { 207 m.mutableConfigMu.Lock() 208 defer m.mutableConfigMu.Unlock() 209 return proto.Clone(m.mutableConfig.Configuration).(*throttlerdatapb.Configuration) 210 } 211 212 func (m *MaxReplicationLagModule) updateConfiguration(configuration *throttlerdatapb.Configuration, copyZeroValues bool) error { 213 m.mutableConfigMu.Lock() 214 defer m.mutableConfigMu.Unlock() 215 216 newConfig := m.mutableConfig 217 218 if copyZeroValues { 219 newConfig.Configuration = proto.Clone(configuration).(*throttlerdatapb.Configuration) 220 } else { 221 proto.Merge(newConfig.Configuration, configuration) 222 } 223 224 if err := newConfig.Verify(); err != nil { 225 return err 226 } 227 m.mutableConfig = newConfig 228 m.applyMutableConfig = true 229 return nil 230 } 231 232 func (m *MaxReplicationLagModule) resetConfiguration() { 233 m.mutableConfigMu.Lock() 234 defer m.mutableConfigMu.Unlock() 235 236 m.mutableConfig = NewMaxReplicationLagModuleConfig(m.initialMaxReplicationLagSec) 237 m.applyMutableConfig = true 238 } 239 240 // RecordReplicationLag records the current replication lag for processing. 241 func (m *MaxReplicationLagModule) RecordReplicationLag(t time.Time, th *discovery.TabletHealth) { 242 m.mutableConfigMu.Lock() 243 if m.mutableConfig.MaxReplicationLagSec == ReplicationLagModuleDisabled { 244 m.mutableConfigMu.Unlock() 245 return 246 } 247 m.mutableConfigMu.Unlock() 248 249 // Buffer data point for now to unblock the HealthCheck subscriber and process 250 // it asynchronously in ProcessRecords(). 251 m.lagRecords <- replicationLagRecord{t, *th} 252 } 253 254 // ProcessRecords is the main loop, run in a separate Go routine, which 255 // reacts to any replication lag updates (recordings). 256 func (m *MaxReplicationLagModule) ProcessRecords() { 257 defer m.wg.Done() 258 259 for lagRecord := range m.lagRecords { 260 m.processRecord(lagRecord) 261 } 262 } 263 264 func (m *MaxReplicationLagModule) processRecord(lagRecord replicationLagRecord) { 265 m.applyLatestConfig() 266 267 m.lagCache(lagRecord).add(lagRecord) 268 269 m.lagCache(lagRecord).sortByLag(m.getNSlowestReplicasConfig(lagRecord), m.config.MaxReplicationLagSec+1) 270 271 m.recalculateRate(lagRecord) 272 } 273 274 func (m *MaxReplicationLagModule) lagCache(lagRecord replicationLagRecord) *replicationLagCache { 275 return m.lagCacheByType(lagRecord.Target.TabletType) 276 } 277 278 func (m *MaxReplicationLagModule) lagCacheByType(tabletType topodatapb.TabletType) *replicationLagCache { 279 switch tabletType { 280 case topodatapb.TabletType_REPLICA: 281 return m.replicaLagCache 282 case topodatapb.TabletType_RDONLY: 283 return m.rdonlyLagCache 284 default: 285 panic(fmt.Sprintf("BUG: invalid TabletType forwarded: %v", tabletType)) 286 } 287 } 288 289 func (m *MaxReplicationLagModule) getNSlowestReplicasConfig(lagRecord replicationLagRecord) int { 290 switch lagRecord.Target.TabletType { 291 case topodatapb.TabletType_REPLICA: 292 return int(m.config.IgnoreNSlowestReplicas) 293 case topodatapb.TabletType_RDONLY: 294 return int(m.config.IgnoreNSlowestRdonlys) 295 default: 296 panic(fmt.Sprintf("BUG: invalid TabletType forwarded: %v", lagRecord)) 297 } 298 } 299 300 func (m *MaxReplicationLagModule) recalculateRate(lagRecordNow replicationLagRecord) { 301 if lagRecordNow.isZero() { 302 panic("rate recalculation was triggered with a zero replication lag record") 303 } 304 now := lagRecordNow.time 305 lagNow := lagRecordNow.lag() 306 307 m.memory.ageBadRate(now) 308 309 r := result{ 310 Now: now, 311 RateChange: unchangedRate, 312 lastRateChange: m.lastRateChange, 313 OldState: m.currentState, 314 NewState: m.currentState, 315 OldRate: m.rate.Get(), 316 NewRate: m.rate.Get(), 317 LagRecordNow: lagRecordNow, 318 } 319 if lagNow <= m.config.TargetReplicationLagSec { 320 // Lag in range: [0, target] 321 r.TestedState = stateIncreaseRate 322 } else if lagNow <= m.config.MaxReplicationLagSec { 323 // Lag in range: (target, max] 324 r.TestedState = stateDecreaseAndGuessRate 325 } else { 326 // Lag in range: (max, infinite] 327 r.TestedState = stateEmergency 328 } 329 330 // Declare new variables before we use "goto". Required by the Go compiler. 331 var clear bool 332 var clearReason string 333 334 if m.lagCache(lagRecordNow).ignoreSlowReplica(discovery.TabletToMapKey(lagRecordNow.Tablet)) { 335 r.Reason = fmt.Sprintf("skipping this replica because it's among the %d slowest %v tablets", m.getNSlowestReplicasConfig(lagRecordNow), lagRecordNow.Target.TabletType.String()) 336 goto logResult 337 } 338 339 clear, clearReason = m.clearReplicaUnderTest(now, r.TestedState, lagRecordNow) 340 if clear { 341 clearReason = fmt.Sprintf("; previous replica under test (%v) cleared because %v", m.replicaUnderTest.alias, clearReason) 342 m.replicaUnderTest = nil 343 } 344 345 if r.TestedState == stateIncreaseRate && now.Before(m.nextAllowedChangeAfterInit) { 346 r.Reason = fmt.Sprintf("waiting %.1f more seconds since init until we try the first increase", m.nextAllowedChangeAfterInit.Sub(now).Seconds()) 347 goto logResult 348 } 349 350 if !m.isReplicaUnderTest(&r, now, r.TestedState, lagRecordNow) { 351 goto logResult 352 } 353 354 // Process the lag record and adjust the rate. 355 if m.replicaUnderTest != nil { 356 // We're checking the same replica again. The old value is no longer needed. 357 m.replicaUnderTest = nil 358 } 359 switch r.TestedState { 360 case stateIncreaseRate: 361 m.increaseRate(&r, now, lagRecordNow) 362 case stateDecreaseAndGuessRate: 363 m.decreaseAndGuessRate(&r, now, lagRecordNow) 364 case stateEmergency: 365 m.emergency(&r, now, lagRecordNow) 366 default: 367 panic(fmt.Sprintf("BUG: invalid state: %v", r.TestedState)) 368 } 369 370 logResult: 371 r.HighestGood = m.memory.highestGood() 372 r.LowestBad = m.memory.lowestBad() 373 374 if clear { 375 r.Reason += clearReason 376 } 377 378 log.Infof("%v", r) 379 m.results.add(r) 380 } 381 382 // clearReplicaUnderTest returns true if the current "replica under test" should 383 // be cleared e.g. because the new lag record is more severe or we did not hear 384 // back from the replica under test for a while. 385 func (m *MaxReplicationLagModule) clearReplicaUnderTest(now time.Time, testedState state, lagRecordNow replicationLagRecord) (bool, string) { 386 if m.replicaUnderTest == nil { 387 return false, "" 388 } 389 390 if stateGreater(testedState, m.replicaUnderTest.state) { 391 // Example: "decrease" > "increase" will return true. 392 return true, fmt.Sprintf("the new state is more severe (%v -> %v)", m.replicaUnderTest.state, testedState) 393 } 394 395 // Verify that the current replica under test is not in an error state. 396 lr := lagRecordNow 397 if m.replicaUnderTest.key != discovery.TabletToMapKey(lr.Tablet) { 398 lr = m.lagCacheByType(m.replicaUnderTest.tabletType).latest(m.replicaUnderTest.key) 399 } 400 if lr.isZero() { 401 // Replica is no longer tracked by the cache i.e. may be offline. 402 return true, "it is no longer actively tracked" 403 } 404 if lr.LastError != nil { 405 // LastError is set i.e. HealthCheck module cannot connect and the cached 406 // data for the replica might be outdated. 407 return true, "it has LastError set i.e. is no longer correctly tracked" 408 } 409 410 if m.lagCacheByType(m.replicaUnderTest.tabletType).isIgnored(m.replicaUnderTest.key) { 411 // "replica under test" has become a slow, ignored replica. 412 return true, "it is ignored as a slow replica" 413 } 414 415 if now.After(m.replicaUnderTest.nextAllowedChange.Add(m.config.MaxDurationBetweenIncreases())) { 416 // We haven't heard from the replica under test for too long. Assume it did 417 // timeout. 418 return true, fmt.Sprintf("we didn't see a recent record from it within the last %.1f seconds", m.config.MaxDurationBetweenIncreases().Seconds()) 419 } 420 421 return false, "" 422 } 423 424 // stateGreater returns true if a > b i.e. the state "a" is more severe than 425 // "b". For example, "decrease" > "increase" returns true. 426 func stateGreater(a, b state) bool { 427 switch a { 428 case stateIncreaseRate: 429 return false 430 case stateDecreaseAndGuessRate: 431 return b == stateIncreaseRate 432 case stateEmergency: 433 return b == stateIncreaseRate || b == stateDecreaseAndGuessRate 434 default: 435 panic(fmt.Sprintf("BUG: cannot compare states: %v and %v", a, b)) 436 } 437 } 438 439 // isReplicaUnderTest returns true if a 'replica under test' is currently set 440 // and we should not skip the current replica ("lagRecordNow"). 441 // Even if it's the same replica we may skip it and return false because 442 // we want to wait longer for the propagation of the current rate change. 443 func (m *MaxReplicationLagModule) isReplicaUnderTest(r *result, now time.Time, testedState state, lagRecordNow replicationLagRecord) bool { 444 if m.replicaUnderTest == nil { 445 return true 446 } 447 448 if m.replicaUnderTest.key != discovery.TabletToMapKey(lagRecordNow.Tablet) { 449 r.Reason = fmt.Sprintf("skipping this replica because we're waiting for the next lag record from the 'replica under test': %v", m.replicaUnderTest.alias) 450 return false 451 } 452 453 if stateGreater(m.replicaUnderTest.state, testedState) { 454 // The state of the "replica under test" improved e.g. it went from 455 // "decreased" to "increased". 456 // Stop testing (i.e. skipping) it and do not wait until the full test 457 // duration is up. 458 return true 459 } 460 461 if now.Before(m.replicaUnderTest.nextAllowedChange) { 462 r.Reason = fmt.Sprintf("waiting %.1f more seconds to see if the lag has changed", m.replicaUnderTest.nextAllowedChange.Sub(now).Seconds()) 463 return false 464 } 465 466 return true 467 } 468 469 func (m *MaxReplicationLagModule) increaseRate(r *result, now time.Time, lagRecordNow replicationLagRecord) { 470 m.markCurrentRateAsBadOrGood(r, now, stateIncreaseRate, unknown) 471 472 oldRate := m.rate.Get() 473 actualRate := m.actualRatesHistory.average(m.lastRateChange, now) 474 // Do not increase the rate if we didn't see an actual rate that approached the current max rate. 475 // actualRate will be NaN if there were no observations in the history. 476 if math.IsNaN(actualRate) || 477 actualRate < float64(oldRate)*m.config.MaxRateApproachThreshold { 478 r.RateChange = unchangedRate 479 r.OldRate = oldRate 480 r.NewRate = oldRate 481 r.Reason = fmt.Sprintf("Skipping periodic increase of the max rate (%v) since the actual: average rate (%v) did not approach it.", oldRate, actualRate) 482 r.CurrentRate = int64(actualRate) 483 return 484 } 485 486 // Calculate new rate based on the previous (preferably highest good) rate. 487 highestGood := m.memory.highestGood() 488 previousRateSource := "highest known good rate" 489 previousRate := float64(highestGood) 490 if previousRate == 0.0 { 491 // No known high good rate. Use the actual value instead. 492 // (It might be lower because the system was slower or the throttler rate was 493 // set by a different module and not us.) 494 previousRateSource = "previous actual rate" 495 previousRate = actualRate 496 } 497 498 // a) Increase rate by MaxIncrease. 499 increaseReason := fmt.Sprintf("a max increase of %.1f%%", m.config.MaxIncrease*100) 500 rate := previousRate * (1 + m.config.MaxIncrease) 501 502 // b) Make the increase less aggressive if it goes above the bad rate. 503 lowestBad := float64(m.memory.lowestBad()) 504 if lowestBad != 0 { 505 if rate > lowestBad { 506 // New rate will be the middle value of [previous rate, lowest bad rate]. 507 rate = previousRate + (lowestBad-previousRate)/2 508 increaseReason += fmt.Sprintf(" (but limited to the middle value in the range [previous rate, lowest bad rate]: [%.0f, %.0f]", previousRate, lowestBad) 509 } 510 } 511 // c) Always make minimum progress compared to oldRate. 512 // Necessary for the following cases: 513 // - MaxIncrease is too low and the rate might not increase 514 // - after the new rate was limited by the bad rate, we got the old rate 515 // (In this case we might slightly go above the bad rate which we accept.) 516 if int64(rate) <= oldRate { 517 rate = float64(oldRate) + memoryGranularity 518 increaseReason += fmt.Sprintf(" (minimum progress by %v)", memoryGranularity) 519 previousRateSource = "previous set rate" 520 previousRate = float64(oldRate) 521 } 522 523 increase := (rate - previousRate) / previousRate 524 minTestDuration := m.minTestDurationUntilNextIncrease(increase) 525 reason := fmt.Sprintf("periodic increase of the %v from %d to %d (by %.1f%%) based on %v to find out the maximum - next allowed increase in %.0f seconds", 526 previousRateSource, int64(previousRate), int64(rate), increase*100, increaseReason, minTestDuration.Seconds()) 527 m.updateRate(r, stateIncreaseRate, int64(rate), reason, now, lagRecordNow, minTestDuration) 528 } 529 530 func (m *MaxReplicationLagModule) minTestDurationUntilNextIncrease(increase float64) time.Duration { 531 minDuration := m.config.MinDurationBetweenIncreases() 532 // We may have to wait longer than the configured minimum duration 533 // until we see an effect of the increase. 534 // Example: If the increase was fully over the capacity, it will take 535 // 1 / increase seconds until the replication lag goes up by 1 second. 536 // E.g. 537 // (If the system was already at its maximum capacity (e.g. 1k QPS) and we 538 // increase the rate by e.g. 5% to 1050 QPS, it will take 20 seconds until 539 // 1000 extra queries are buffered and the lag increases by 1 second.) 540 // On top of that, add 2 extra seconds to account for a delayed propagation 541 // of the data (because the throttler takes over the updated rate only every 542 // second and it publishes its rate history only after a second is over). 543 // TODO(mberlin): Instead of adding 2 seconds, should we wait for twice the 544 // calculated time instead? 545 minPropagationTime := time.Duration(1.0/increase+2) * time.Second 546 if minPropagationTime > minDuration { 547 minDuration = minPropagationTime 548 } 549 if minDuration > m.config.MaxDurationBetweenIncreases() { 550 // Cap the rate to a reasonable amount of time (very small increases may 551 // result into a 20 minutes wait otherwise.) 552 minDuration = m.config.MaxDurationBetweenIncreases() 553 } 554 return minDuration 555 } 556 557 func (m *MaxReplicationLagModule) decreaseAndGuessRate(r *result, now time.Time, lagRecordNow replicationLagRecord) { 558 // Guess replication rate based on the difference in the replication lag of this 559 // particular replica. 560 lagRecordBefore := m.lagCache(lagRecordNow).atOrAfter(discovery.TabletToMapKey(lagRecordNow.Tablet), m.lastRateChange) 561 if lagRecordBefore.isZero() { 562 // We should see at least "lagRecordNow" here because we did just insert it 563 // in processRecord(). 564 panic(fmt.Sprintf("BUG: replicationLagCache did not return the lagRecord for current replica: %v or a previous record of it. lastRateChange: %v replicationLagCache size: %v entries: %v", lagRecordNow, m.lastRateChange, len(m.lagCache(lagRecordNow).entries), m.lagCache(lagRecordNow).entries)) 565 } 566 // Store the record in the result. 567 r.LagRecordBefore = lagRecordBefore 568 if lagRecordBefore.time == lagRecordNow.time { 569 // No lag record for this replica in the time span 570 // [last rate change, current lag record). 571 // Without it we won't be able to guess the replication rate. 572 // We err on the side of caution and reduce the rate by half the emergency 573 // decrease percentage. 574 decreaseReason := fmt.Sprintf("no previous lag record for this replica available since the last rate change (%.1f seconds ago)", now.Sub(m.lastRateChange).Seconds()) 575 m.decreaseRateByPercentage(r, now, lagRecordNow, stateDecreaseAndGuessRate, m.config.EmergencyDecrease/2, decreaseReason) 576 return 577 } 578 579 // Analyze if the past rate was good or bad. 580 lagBefore := lagRecordBefore.lag() 581 lagNow := lagRecordNow.lag() 582 replicationLagChange := less 583 // Note that we consider lag changes of 1 second as equal as well because 584 // they might be a rounding error in MySQL due to using timestamps at 585 // second granularity. 586 if lagNow == lagBefore || math.Abs(float64(lagNow-lagBefore)) == 1 { 587 replicationLagChange = equal 588 } else if lagNow > lagBefore { 589 replicationLagChange = greater 590 } 591 m.markCurrentRateAsBadOrGood(r, now, stateDecreaseAndGuessRate, replicationLagChange) 592 593 if replicationLagChange == equal { 594 // The replication lag did not change. Keep going at the current rate. 595 r.Reason = fmt.Sprintf("did not decrease the rate because the lag did not change (assuming a 1s error margin)") // nolint 596 return 597 } 598 599 // Find out the average rate (per second) at which we inserted data 600 // at the primary during the observed timespan. 601 from := lagRecordBefore.time 602 to := lagRecordNow.time 603 avgPrimaryRate := m.actualRatesHistory.average(from, to) 604 if math.IsNaN(avgPrimaryRate) { 605 // NaN (0.0/0.0) occurs when no observations were in the timespan. 606 // Wait for more rate observations. 607 r.Reason = fmt.Sprintf("did not decrease the rate because the throttler has not recorded its historic rates in the range [%v , %v]", from.Format("15:04:05"), to.Format("15:04:05")) 608 return 609 } 610 611 // Sanity check and correct the data points if necessary. 612 d := lagRecordNow.time.Sub(lagRecordBefore.time) 613 lagDifference := time.Duration(lagRecordNow.lag()-lagRecordBefore.lag()) * time.Second 614 if lagDifference > d { 615 log.Errorf("Replication lag increase is higher than the elapsed time: %v > %v. This should not happen. Replication Lag Data points: Before: %+v Now: %+v", lagDifference, d, lagRecordBefore, lagRecordNow) 616 d = lagDifference 617 } 618 619 // Guess the replica capacity based on the replication lag change. 620 rate, reason := m.guessReplicationRate(r, avgPrimaryRate, lagBefore, lagNow, lagDifference, d) 621 622 m.updateRate(r, stateDecreaseAndGuessRate, rate, reason, now, lagRecordNow, m.config.MinDurationBetweenDecreases()) 623 } 624 625 // guessReplicationRate guesses the actual replication rate based on the new bac 626 // Note that "lagDifference" can be positive (lag increased) or negative (lag 627 // decreased). 628 func (m *MaxReplicationLagModule) guessReplicationRate(r *result, avgPrimaryRate float64, lagBefore, lagNow int64, lagDifference, d time.Duration) (int64, string) { 629 // avgReplicationRate is the average rate (per second) at which the replica 630 // applied transactions from the replication stream. We infer the value 631 // from the relative change in the replication lag. 632 avgReplicationRate := avgPrimaryRate * (d - lagDifference).Seconds() / d.Seconds() 633 if avgReplicationRate <= 0 { 634 log.Warningf("guessed Replication rate was <= 0 (%v). Primary rate: %v d: %.1f lag difference: %.1f", avgReplicationRate, avgPrimaryRate, d.Seconds(), lagDifference.Seconds()) 635 avgReplicationRate = 1 636 } 637 r.PrimaryRate = int64(avgPrimaryRate) 638 r.GuessedReplicationRate = int64(avgReplicationRate) 639 640 oldRequestsBehind := 0.0 641 // If the old lag was > 0s, the replica needs to catch up on that as well. 642 if lagNow > lagBefore { 643 oldRequestsBehind = avgReplicationRate * float64(lagBefore) 644 } 645 newRequestsBehind := 0.0 646 // If the lag increased (i.e. replication rate was slower), the replica must make up 647 // for the difference in the future. 648 if avgReplicationRate < avgPrimaryRate { 649 newRequestsBehind = (avgPrimaryRate - avgReplicationRate) * d.Seconds() 650 } 651 requestsBehind := oldRequestsBehind + newRequestsBehind 652 r.GuessedReplicationBacklogOld = int(oldRequestsBehind) 653 r.GuessedReplicationBacklogNew = int(newRequestsBehind) 654 655 newRate := avgReplicationRate 656 // Reduce the new rate such that it has time to catch up the requests it's 657 // behind within the next interval. 658 futureRequests := newRate * m.config.SpreadBacklogAcross().Seconds() 659 newRate *= (futureRequests - requestsBehind) / futureRequests 660 var reason string 661 if newRate < 1 { 662 // Backlog is too high. Reduce rate to 1 request/second. 663 // TODO(mberlin): Make this a constant. 664 newRate = 1 665 reason = fmt.Sprintf("based on the guessed replication rate of: %v the replica won't be able to process the guessed backlog of %d requests within the next %.f seconds", int64(avgReplicationRate), int64(requestsBehind), m.config.SpreadBacklogAcross().Seconds()) 666 } else { 667 reason = fmt.Sprintf("new rate is %d lower than the guessed replication rate to account for a guessed backlog of %d requests over %.f seconds", int64(avgReplicationRate-newRate), int64(requestsBehind), m.config.SpreadBacklogAcross().Seconds()) 668 } 669 670 return int64(newRate), reason 671 } 672 673 func (m *MaxReplicationLagModule) emergency(r *result, now time.Time, lagRecordNow replicationLagRecord) { 674 m.markCurrentRateAsBadOrGood(r, now, stateEmergency, unknown) 675 676 decreaseReason := fmt.Sprintf("replication lag went beyond max: %d > %d", lagRecordNow.lag(), m.config.MaxReplicationLagSec) 677 m.decreaseRateByPercentage(r, now, lagRecordNow, stateEmergency, m.config.EmergencyDecrease, decreaseReason) 678 } 679 680 func (m *MaxReplicationLagModule) decreaseRateByPercentage(r *result, now time.Time, lagRecordNow replicationLagRecord, newState state, decrease float64, decreaseReason string) { 681 oldRate := m.rate.Get() 682 rate := int64(float64(oldRate) - float64(oldRate)*decrease) 683 if rate == 0 { 684 // Never fully stop throttling. 685 rate = 1 686 } 687 688 reason := fmt.Sprintf("%v: reducing previous rate of %d by %.f%% to: %v", decreaseReason, oldRate, decrease*100, rate) 689 m.updateRate(r, newState, rate, reason, now, lagRecordNow, m.config.MinDurationBetweenDecreases()) 690 } 691 692 func (m *MaxReplicationLagModule) updateRate(r *result, newState state, rate int64, reason string, now time.Time, lagRecordNow replicationLagRecord, testDuration time.Duration) { 693 oldRate := m.rate.Get() 694 695 m.currentState = newState 696 697 // Update result with the new state. 698 r.NewState = newState 699 r.NewRate = rate 700 r.Reason = reason 701 if rate > oldRate { 702 r.RateChange = increasedRate 703 } else if rate < oldRate { 704 r.RateChange = decreasedRate 705 } 706 707 m.lastRateChange = now 708 m.replicaUnderTest = &replicaUnderTest{discovery.TabletToMapKey(lagRecordNow.Tablet), topoproto.TabletAliasString(lagRecordNow.Tablet.Alias), lagRecordNow.Target.TabletType, newState, now.Add(testDuration)} 709 710 if rate == oldRate { 711 return 712 } 713 m.rate.Set(int64(rate)) 714 // Notify the throttler that we updated our max rate. 715 m.rateUpdateChan <- struct{}{} 716 } 717 718 // markCurrentRateAsBadOrGood determines the actual rate between the last rate 719 // change and "now" and determines if that rate was bad or good. 720 func (m *MaxReplicationLagModule) markCurrentRateAsBadOrGood(r *result, now time.Time, newState state, replicationLagChange replicationLagChange) { 721 if m.lastRateChange.IsZero() { 722 // Module was just started. We don't have any data points yet. 723 r.GoodOrBad = ignoredRate 724 r.MemorySkipReason = "rate was never changed before (initial start)" 725 return 726 } 727 728 // Use the actual rate instead of the set rate. 729 // (It might be lower because the system was slower or the throttler rate was 730 // set by a different module and not us.) 731 rate := m.actualRatesHistory.average(m.lastRateChange, now) 732 if math.IsNaN(rate) { 733 // NaN (0.0/0.0) occurs when no records were in the timespan. 734 // Wait for more records. 735 r.GoodOrBad = ignoredRate 736 r.MemorySkipReason = "cannot determine actual rate: no records in [lastRateChange, now]" 737 return 738 } 739 740 rateIsGood := false 741 742 switch m.currentState { 743 case stateIncreaseRate: 744 switch newState { 745 case stateIncreaseRate: 746 rateIsGood = true 747 case stateDecreaseAndGuessRate: 748 rateIsGood = false 749 case stateEmergency: 750 rateIsGood = false 751 } 752 case stateDecreaseAndGuessRate: 753 switch newState { 754 case stateIncreaseRate: 755 rateIsGood = true 756 case stateDecreaseAndGuessRate: 757 switch replicationLagChange { 758 case unknown: 759 return 760 case less: 761 rateIsGood = true 762 case equal: 763 // Replication lag kept constant. Impossible to judge if the rate is good or bad. 764 return 765 case greater: 766 rateIsGood = false 767 } 768 case stateEmergency: 769 rateIsGood = false 770 } 771 case stateEmergency: 772 // Rate changes initiated during an "emergency" phase provide no meaningful data point. 773 r.MemorySkipReason = "not marking a rate as good or bad while in the emergency state" 774 m.memory.touchBadRateAge(now) 775 return 776 } 777 778 r.CurrentRate = int64(rate) 779 if rateIsGood { 780 if err := m.memory.markGood(int64(rate)); err == nil { 781 r.GoodOrBad = goodRate 782 } else { 783 r.MemorySkipReason = err.Error() 784 } 785 } else { 786 if err := m.memory.markBad(int64(rate), now); err == nil { 787 r.GoodOrBad = badRate 788 } else { 789 r.MemorySkipReason = err.Error() 790 } 791 } 792 } 793 794 func (m *MaxReplicationLagModule) log() []result { 795 return m.results.latestValues() 796 }