vitess.io/vitess@v0.16.2/go/vt/throttler/max_replication_lag_module_test.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package throttler 18 19 import ( 20 "errors" 21 "fmt" 22 "testing" 23 "time" 24 25 "vitess.io/vitess/go/vt/log" 26 27 "vitess.io/vitess/go/vt/discovery" 28 29 querypb "vitess.io/vitess/go/vt/proto/query" 30 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 31 ) 32 33 // The tests below tests the heuristic of the MaxReplicationLagModule. 34 // Depending on a replica's replication lag and the historic throttler rate, 35 // the module does increase or decrease the throttler rate. 36 37 // Most of the tests assume that there are two REPLICA tablets r1 and r2 which 38 // both broadcast their replication lag every 20s. 39 // r1 starts to broadcast at 0s, r2 at 10s. 40 // Additionally, there are two RDONLY tablets rdonly1 and rdonly2 which are only 41 // used by the "IgnoreNSlowestReplicas" tests. 42 const ( 43 // Automatic enumeration starts at 0. But there's no r0. Ignore it. 44 _ = iota 45 r1 46 r2 47 rdonly1 48 rdonly2 49 ) 50 51 type testFixture struct { 52 m *MaxReplicationLagModule 53 ratesHistory *fakeRatesHistory 54 } 55 56 func newTestFixtureWithMaxReplicationLag(maxReplicationLag int64) (*testFixture, error) { 57 config := NewMaxReplicationLagModuleConfig(maxReplicationLag) 58 return newTestFixture(config) 59 } 60 61 func newTestFixture(config MaxReplicationLagModuleConfig) (*testFixture, error) { 62 ratesHistory := newFakeRatesHistory() 63 fc := &fakeClock{} 64 // Do not start at 0*time.Second because than the code cannot distinguish 65 // between a legimate value and a zero time.Time value. 66 fc.setNow(1 * time.Second) 67 m, err := NewMaxReplicationLagModule(config, ratesHistory.aggregatedIntervalHistory, fc.now) 68 if err != nil { 69 return nil, err 70 } 71 // Updates for the throttler go into a big channel and will be ignored. 72 m.rateUpdateChan = make(chan<- struct{}, 1000) 73 74 return &testFixture{ 75 m: m, 76 ratesHistory: ratesHistory, 77 }, nil 78 } 79 80 // process does the same thing as MaxReplicationLagModule.ProcessRecords() does 81 // for a new "lagRecord". 82 func (tf *testFixture) process(lagRecord replicationLagRecord) { 83 tf.m.processRecord(lagRecord) 84 } 85 86 func (tf *testFixture) checkState(state state, rate int64, lastRateChange time.Time) error { 87 if got, want := tf.m.currentState, state; got != want { 88 return fmt.Errorf("module in wrong state. got = %v, want = %v", got, want) 89 } 90 if got, want := tf.m.MaxRate(), rate; got != want { 91 return fmt.Errorf("module has wrong MaxRate(). got = %v, want = %v", got, want) 92 } 93 if got, want := tf.m.lastRateChange, lastRateChange; got != want { 94 return fmt.Errorf("module has wrong lastRateChange time. got = %v, want = %v", got, want) 95 } 96 return nil 97 } 98 99 func TestMaxReplicationLagModule_RateNotZeroWhenDisabled(t *testing.T) { 100 tf, err := newTestFixtureWithMaxReplicationLag(ReplicationLagModuleDisabled) 101 if err != nil { 102 t.Fatal(err) 103 } 104 105 // Initial rate must not be zero. It's ReplicationLagModuleDisabled instead. 106 if err := tf.checkState(stateIncreaseRate, ReplicationLagModuleDisabled, sinceZero(1*time.Second)); err != nil { 107 t.Fatal(err) 108 } 109 } 110 111 func TestMaxReplicationLagModule_InitialStateAndWait(t *testing.T) { 112 config := NewMaxReplicationLagModuleConfig(5) 113 // Overwrite the default config to make sure we test a non-default value. 114 config.InitialRate = 123 115 config.MaxDurationBetweenIncreasesSec = 23 116 tf, err := newTestFixture(config) 117 if err != nil { 118 t.Fatal(err) 119 } 120 121 // Initial rate must be config.InitialRate. 122 if err := tf.checkState(stateIncreaseRate, config.InitialRate, sinceZero(1*time.Second)); err != nil { 123 t.Fatal(err) 124 } 125 // After startup, the next increment won't happen until 126 // config.MaxDurationBetweenIncreasesSec elapsed. 127 if got, want := tf.m.nextAllowedChangeAfterInit, sinceZero(config.MaxDurationBetweenIncreases()+1*time.Second); got != want { 128 t.Fatalf("got = %v, want = %v", got, want) 129 } 130 } 131 132 // TestMaxReplicationLagModule_Increase tests only the continuous increase of the 133 // rate and assumes that we are well below the replica capacity. 134 func TestMaxReplicationLagModule_Increase(t *testing.T) { 135 tf, err := newTestFixtureWithMaxReplicationLag(5) 136 if err != nil { 137 t.Fatal(err) 138 } 139 140 // We start at config.InitialRate. 141 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 142 t.Fatal(err) 143 } 144 // After the initial wait period of 62s 145 // (config.MaxDurationBetweenIncreasesSec), regular increments start. 146 147 // r2 @ 70s, 0s lag 148 // NOTE: We don't add a record for 70s itself because the throttler only 149 // publishes it at the "end" of that second i.e. when the time at 71s started. 150 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 151 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 152 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 153 // r2 becomes the "replica under test". 154 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 155 t.Fatal(err) 156 } 157 // We have to wait at least config.MinDurationBetweenIncreasesSec (40s) before 158 // the next increase. 159 if got, want := tf.m.replicaUnderTest.nextAllowedChange, sinceZero(70*time.Second).Add(tf.m.config.MinDurationBetweenIncreases()); got != want { 160 t.Fatalf("got = %v, want = %v", got, want) 161 } 162 // r2 @ 75s, 0s lag 163 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 164 tf.ratesHistory.add(sinceZero(74*time.Second), 200) 165 tf.process(lagRecord(sinceZero(75*time.Second), r2, 0)) 166 // Lag record was ignored because it's within the wait period. 167 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 168 t.Fatal(err) 169 } 170 171 // r1 @ 80s, 0s lag 172 tf.ratesHistory.add(sinceZero(79*time.Second), 200) 173 tf.process(lagRecord(sinceZero(80*time.Second), r1, 0)) 174 // The r1 lag update was ignored because an increment "under test" is always 175 // locked in with the replica which triggered the increase (r2 this time). 176 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 177 t.Fatal(err) 178 } 179 180 // No increase is possible for the next 20 seconds. 181 182 // r2 @ 90s, 0s lag 183 tf.ratesHistory.add(sinceZero(80*time.Second), 200) 184 tf.ratesHistory.add(sinceZero(89*time.Second), 200) 185 tf.process(lagRecord(sinceZero(90*time.Second), r2, 0)) 186 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 187 t.Fatal(err) 188 } 189 190 // r1 @ 100s, 0s lag 191 tf.ratesHistory.add(sinceZero(99*time.Second), 200) 192 tf.process(lagRecord(sinceZero(100*time.Second), r1, 0)) 193 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 194 t.Fatal(err) 195 } 196 197 // Next rate increase is possible after testing the rate for 40s. 198 199 // r2 @ 110s, 0s lag 200 tf.ratesHistory.add(sinceZero(109*time.Second), 200) 201 tf.process(lagRecord(sinceZero(110*time.Second), r2, 0)) 202 if err := tf.checkState(stateIncreaseRate, 400, sinceZero(110*time.Second)); err != nil { 203 t.Fatal(err) 204 } 205 } 206 207 // TestMaxReplicationLagModule_ReplicaUnderTest_LastErrorOrNotUp is 208 // almost identical to TestMaxReplicationLagModule_Increase but this time we 209 // test that the system makes progress if the currently tracked replica has 210 // LastError set or is no longer tracked. 211 func TestMaxReplicationLagModule_ReplicaUnderTest_LastErrorOrNotUp(t *testing.T) { 212 tf, err := newTestFixtureWithMaxReplicationLag(5) 213 if err != nil { 214 t.Fatal(err) 215 } 216 217 // r2 @ 70s, 0s lag 218 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 219 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 220 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 221 // r2 becomes the "replica under test". 222 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 223 t.Fatal(err) 224 } 225 226 // r2 @ 75s, 0s lag, LastError set 227 rError := lagRecord(sinceZero(75*time.Second), r2, 0) 228 rError.LastError = errors.New("HealthCheck reporting broken") 229 tf.m.replicaLagCache.add(rError) 230 231 // r1 @ 110s, 0s lag 232 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 233 tf.ratesHistory.add(sinceZero(109*time.Second), 200) 234 tf.process(lagRecord(sinceZero(110*time.Second), r1, 0)) 235 // We ignore r2 as "replica under test" because it has LastError set. 236 // Instead, we act on r1. 237 // r1 becomes the "replica under test". 238 if err := tf.checkState(stateIncreaseRate, 400, sinceZero(110*time.Second)); err != nil { 239 t.Fatal(err) 240 } 241 242 // We'll simulate a shutdown of r1 i.e. we're no longer tracking it. 243 // r1 @ 115s, 0s lag, !Up 244 tf.ratesHistory.add(sinceZero(110*time.Second), 200) 245 tf.ratesHistory.add(sinceZero(114*time.Second), 400) 246 rNotUp := lagRecord(sinceZero(115*time.Second), r1, 0) 247 rNotUp.Serving = false 248 tf.m.replicaLagCache.add(rNotUp) 249 250 // r2 @ 150s, 0s lag (lastError no longer set) 251 tf.ratesHistory.add(sinceZero(149*time.Second), 400) 252 tf.process(lagRecord(sinceZero(150*time.Second), r2, 0)) 253 // We ignore r1 as "replica under test" because it has !Up set. 254 // Instead, we act on r2. 255 // r2 becomes the "replica under test". 256 if err := tf.checkState(stateIncreaseRate, 800, sinceZero(150*time.Second)); err != nil { 257 t.Fatal(err) 258 } 259 } 260 261 // TestMaxReplicationLagModule_ReplicaUnderTest_Timeout tests the safe guard 262 // that a "replica under test" which didn't report its lag for a while will be 263 // cleared such that any other replica can become the new "replica under test". 264 func TestMaxReplicationLagModule_ReplicaUnderTest_Timeout(t *testing.T) { 265 tf, err := newTestFixtureWithMaxReplicationLag(5) 266 if err != nil { 267 t.Fatal(err) 268 } 269 270 // r2 @ 70s, 0s lag 271 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 272 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 273 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 274 // r2 becomes the "replica under test". 275 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 276 t.Fatal(err) 277 } 278 279 // r1 @ 80s, 0s lag (ignored because r2 is the "replica under test") 280 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 281 tf.ratesHistory.add(sinceZero(79*time.Second), 200) 282 tf.process(lagRecord(sinceZero(80*time.Second), r1, 0)) 283 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 284 t.Fatal(err) 285 } 286 287 // r2 as "replica under test" did not report its lag for too long. 288 // We'll ignore it from now and let other replicas trigger rate changes. 289 // r1 @ 173s, 0s lag 290 // time for r1 must be > 172s (70s + 40s + 62s) which is 291 // (last rate change + test duration + max duration between increases). 292 tf.ratesHistory.add(sinceZero(172*time.Second), 200) 293 tf.process(lagRecord(sinceZero(173*time.Second), r1, 0)) 294 if err := tf.checkState(stateIncreaseRate, 400, sinceZero(173*time.Second)); err != nil { 295 t.Fatal(err) 296 } 297 } 298 299 // TestMaxReplicationLagModule_ReplicaUnderTest_IncreaseToDecrease verifies that 300 // the current "replica under test" is ignored when our state changes from 301 // "stateIncreaseRate" to "stateDecreaseAndGuessRate". 302 func TestMaxReplicationLagModule_ReplicaUnderTest_IncreaseToDecrease(t *testing.T) { 303 tf, err := newTestFixtureWithMaxReplicationLag(5) 304 if err != nil { 305 t.Fatal(err) 306 } 307 308 // r2 @ 70s, 0s lag (triggers the increase state) 309 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 310 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 311 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 312 // r2 becomes the "replica under test". 313 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 314 t.Fatal(err) 315 } 316 317 // r1 @ 80s, 0s lag 318 // This lag record is required in the next step to correctly calculate how 319 // much r1 lags behind due to the rate increase. 320 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 321 tf.ratesHistory.add(sinceZero(79*time.Second), 200) 322 tf.process(lagRecord(sinceZero(80*time.Second), r1, 0)) 323 // r1 remains the "replica under test". 324 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 325 t.Fatal(err) 326 } 327 328 // r2 @ 90s, 0s lag (ignored because the test duration is not up yet) 329 tf.ratesHistory.add(sinceZero(89*time.Second), 200) 330 tf.process(lagRecord(sinceZero(90*time.Second), r2, 0)) 331 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 332 t.Fatal(err) 333 } 334 335 // r1 @ 100s, 3s lag (above target, provokes a decrease) 336 tf.ratesHistory.add(sinceZero(99*time.Second), 200) 337 tf.process(lagRecord(sinceZero(100*time.Second), r1, 3)) 338 // r1 becomes the "replica under test". 339 // r1's high lag triggered the decrease state and therefore we did not wait 340 // for the pending increase of "replica under test" r2. 341 if err := tf.checkState(stateDecreaseAndGuessRate, 140, sinceZero(100*time.Second)); err != nil { 342 t.Fatal(err) 343 } 344 345 // r2 lag records are ignored while r1 is the "replica under test". 346 // r2 @ 110s, 0s lag 347 tf.ratesHistory.add(sinceZero(100*time.Second), 200) 348 tf.ratesHistory.add(sinceZero(109*time.Second), 140) 349 tf.process(lagRecord(sinceZero(110*time.Second), r2, 0)) 350 if err := tf.checkState(stateDecreaseAndGuessRate, 140, sinceZero(100*time.Second)); err != nil { 351 t.Fatal(err) 352 } 353 354 // r1 leaves the "replica under test" as soon as the test duration is up 355 // or its lag improved to a better state. 356 // Here we simulate that its lag improved and the resulting state goes from 357 // "decrease" to "increase". 358 // r1 @ 119s, 0s lag (triggers the increase state) 359 tf.ratesHistory.add(sinceZero(118*time.Second), 140) 360 tf.process(lagRecord(sinceZero(119*time.Second), r1, 0)) 361 // Rate increases to 170, the middle of: [good, bad] = [140, 200]. 362 if err := tf.checkState(stateIncreaseRate, 170, sinceZero(119*time.Second)); err != nil { 363 t.Fatal(err) 364 } 365 } 366 367 // TestMaxReplicationLagModule_ReplicaUnderTest_DecreaseToEmergency verifies 368 // that the current "replica under test" is ignored when our state changes from 369 // "stateDecreaseAndGuessRate" to "stateEmergency". 370 func TestMaxReplicationLagModule_ReplicaUnderTest_DecreaseToEmergency(t *testing.T) { 371 tf, err := newTestFixtureWithMaxReplicationLag(5) 372 if err != nil { 373 t.Fatal(err) 374 } 375 376 // INCREASE 377 378 // r1 @ 20s, 0s lag 379 // This lag record is required in the next step to correctly calculate how 380 // much r1 lags behind due to the rate increase. 381 tf.ratesHistory.add(sinceZero(19*time.Second), 100) 382 tf.process(lagRecord(sinceZero(20*time.Second), r1, 0)) 383 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 384 t.Fatal(err) 385 } 386 387 // DECREASE 388 389 // r1 @ 40s, 3s lag (above target, provokes a decrease) 390 tf.ratesHistory.add(sinceZero(39*time.Second), 100) 391 tf.process(lagRecord(sinceZero(40*time.Second), r1, 3)) 392 // r1 becomes the "replica under test". 393 if err := tf.checkState(stateDecreaseAndGuessRate, 70, sinceZero(40*time.Second)); err != nil { 394 t.Fatal(err) 395 } 396 397 // EMERGENCY 398 399 // r2 lag is even worse and triggers the "emergency" state. 400 // r2 @ 50s, 10s lag 401 tf.ratesHistory.add(sinceZero(40*time.Second), 100) 402 tf.ratesHistory.add(sinceZero(49*time.Second), 70) 403 tf.process(lagRecord(sinceZero(50*time.Second), r2, 10)) 404 // r1 overrides r2 as new "replica under test". 405 if err := tf.checkState(stateEmergency, 35, sinceZero(50*time.Second)); err != nil { 406 t.Fatal(err) 407 } 408 409 // r1 lag becomes worse than the r1 lag now. We don't care and keep r1 as 410 // "replica under test" for now. 411 // r1 @ 60s, 15s lag 412 tf.ratesHistory.add(sinceZero(50*time.Second), 70) 413 tf.ratesHistory.add(sinceZero(59*time.Second), 35) 414 tf.process(lagRecord(sinceZero(60*time.Second), r1, 15)) 415 if err := tf.checkState(stateEmergency, 35, sinceZero(50*time.Second)); err != nil { 416 t.Fatal(err) 417 } 418 419 // INCREASE 420 421 // r2 fully recovers and triggers an increase. 422 // r2 @ 70s, 0s lag 423 tf.ratesHistory.add(sinceZero(69*time.Second), 35) 424 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 425 // r2 becomes the new "replica under test". 426 if err := tf.checkState(stateIncreaseRate, 70, sinceZero(70*time.Second)); err != nil { 427 t.Fatal(err) 428 } 429 430 // EMERGENCY 431 432 // r1 hasn't recovered yet and forces us to go back to the "emergency" state. 433 // r1 @ 80s, 15s lag 434 tf.ratesHistory.add(sinceZero(70*time.Second), 35) 435 tf.ratesHistory.add(sinceZero(79*time.Second), 70) 436 tf.process(lagRecord(sinceZero(80*time.Second), r1, 15)) 437 // r1 becomes the new "replica under test". 438 if err := tf.checkState(stateEmergency, 35, sinceZero(80*time.Second)); err != nil { 439 t.Fatal(err) 440 } 441 } 442 443 // TestMaxReplicationLagModule_Increase_BadRateUpperBound verifies that a 444 // known bad rate is always the upper bound for any rate increase. 445 func TestMaxReplicationLagModule_Increase_BadRateUpperBound(t *testing.T) { 446 tf, err := newTestFixtureWithMaxReplicationLag(5) 447 if err != nil { 448 t.Fatal(err) 449 } 450 451 // We start at config.InitialRate. 452 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 453 t.Fatal(err) 454 } 455 456 // Assume that a bad value of 150 was set @ 30s and log error 457 if err := tf.m.memory.markBad(150, sinceZero(30*time.Second)); err != nil { 458 log.Errorf("tf.m.memory.markBad(150, sinceZero(30*time.Second)) falied : %v", err) 459 } 460 461 // r2 @ 70s, 0s lag 462 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 463 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 464 // Rate should get increased to 200 based on actual rate of 100 within 465 // [0s, 69s]. 466 // However, this would go over the bad rate. Therefore, the new rate will be 467 // the middle of [100, 150] ([actual rate, bad rate]). 468 if err := tf.checkState(stateIncreaseRate, 125, sinceZero(70*time.Second)); err != nil { 469 t.Fatal(err) 470 } 471 } 472 473 // TestMaxReplicationLagModule_Increase_MinimumProgress verifies that the 474 // calculated new rate is never identical to the current rate and at least by 475 // "memoryGranularity" higher. 476 func TestMaxReplicationLagModule_Increase_MinimumProgress(t *testing.T) { 477 tf, err := newTestFixtureWithMaxReplicationLag(5) 478 if err != nil { 479 t.Fatal(err) 480 } 481 482 // We start at config.InitialRate. 483 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 484 t.Fatal(err) 485 } 486 487 // Assume that a bad value of 105 was set @ 30s. 488 tf.m.memory.markBad(105, sinceZero(30*time.Second)) 489 490 // r2 @ 70s, 0s lag 491 // Assume that the actual rate was below the limit (95 instead of 100). 492 tf.ratesHistory.add(sinceZero(69*time.Second), 95) 493 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 494 // Rate should get increased to 190 based on an actual rate of 95 within 495 // [0s, 69s]. 496 // However, this would go over the bad rate. Therefore, the new rate should 497 // be the middle of [95, 105] ([actual rate, bad rate]). 498 // But then the new rate is identical to the old set rate of 100. 499 // In such a case, we always advance the new rate by "memoryGranularity" 500 // (which is currently 5). 501 if err := tf.checkState(stateIncreaseRate, 105, sinceZero(70*time.Second)); err != nil { 502 t.Fatal(err) 503 } 504 } 505 506 // TestMaxReplicationLagModule_Decrease verifies that we correctly calculate the 507 // replication rate in the decreaseAndGuessRate state. 508 func TestMaxReplicationLagModule_Decrease(t *testing.T) { 509 tf, err := newTestFixtureWithMaxReplicationLag(5) 510 if err != nil { 511 t.Fatal(err) 512 } 513 514 // r2 @ 70s, 0s lag 515 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 516 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 517 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 518 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 519 t.Fatal(err) 520 } 521 522 // r2 @ 90s, 3s lag (above target, provokes a decrease) 523 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 524 tf.ratesHistory.add(sinceZero(89*time.Second), 200) 525 tf.process(lagRecord(sinceZero(90*time.Second), r2, 3)) 526 // The guessed replication rate is 140 because of the 3s lag increase 527 // within the elapsed 20s. 528 // The replica processed only 17s worth of work (20s duration - 3s lag increase) 529 // 17s / 20s * 200 QPS actual rate => 170 QPS replica rate 530 // 531 // This results in a backlog of 3s * 200 QPS = 600 queries. 532 // Since this backlog is spread across SpreadBacklogAcrossSec (20s), 533 // the guessed rate gets further reduced by 30 QPS (600 queries / 20s). 534 // Hence, the rate is set to 140 QPS (170 - 30). 535 if err := tf.checkState(stateDecreaseAndGuessRate, 140, sinceZero(90*time.Second)); err != nil { 536 t.Fatal(err) 537 } 538 } 539 540 // TestMaxReplicationLagModule_Decrease_NoReplicaHistory skips decreasing the 541 // rate when the lag of r2 goes above the target value because we don't have 542 // replication lag value since the last rate change for r2. Therefore, we cannot 543 // reliably guess its rate and wait for the next available record. 544 func TestMaxReplicationLagModule_Decrease_NoReplicaHistory(t *testing.T) { 545 tf, err := newTestFixtureWithMaxReplicationLag(10) 546 if err != nil { 547 t.Fatal(err) 548 } 549 550 // r2 @ 70s, 0s lag 551 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 552 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 553 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 554 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 555 t.Fatal(err) 556 } 557 558 // r1 @ 80s, 3s lag (above target, but no decrease triggered) 559 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 560 tf.ratesHistory.add(sinceZero(79*time.Second), 200) 561 tf.process(lagRecord(sinceZero(80*time.Second), r1, 3)) 562 // Rate was decreased by 25% (half the emergency decrease) as safety measure. 563 if err := tf.checkState(stateDecreaseAndGuessRate, 150, sinceZero(80*time.Second)); err != nil { 564 t.Fatal(err) 565 } 566 567 // r2 @ 90s, 0s lag 568 tf.ratesHistory.add(sinceZero(80*time.Second), 200) 569 tf.ratesHistory.add(sinceZero(89*time.Second), 150) 570 tf.process(lagRecord(sinceZero(90*time.Second), r2, 0)) 571 // r2 is ignored because r1 is the "replica under test". 572 if err := tf.checkState(stateDecreaseAndGuessRate, 150, sinceZero(80*time.Second)); err != nil { 573 t.Fatal(err) 574 } 575 576 // r1 recovers after the rate decrease and triggers a new increase. 577 // r1 @ 100s, 0s lag 578 tf.ratesHistory.add(sinceZero(99*time.Second), 150) 579 tf.process(lagRecord(sinceZero(100*time.Second), r1, 0)) 580 if err := tf.checkState(stateIncreaseRate, 300, sinceZero(100*time.Second)); err != nil { 581 t.Fatal(err) 582 } 583 } 584 585 func TestMaxReplicationLagModule_IgnoreNSlowestReplicas_REPLICA(t *testing.T) { 586 testIgnoreNSlowestReplicas(t, r1, r2) 587 } 588 589 func TestMaxReplicationLagModule_IgnoreNSlowestReplicas_RDONLY(t *testing.T) { 590 testIgnoreNSlowestReplicas(t, rdonly1, rdonly2) 591 } 592 593 func testIgnoreNSlowestReplicas(t *testing.T, r1UID, r2UID uint32) { 594 config := NewMaxReplicationLagModuleConfig(5) 595 typ := "REPLICA" 596 if r1UID == r1 { 597 config.IgnoreNSlowestReplicas = 1 598 } else { 599 config.IgnoreNSlowestRdonlys = 1 600 typ = "RDONLY" 601 } 602 tf, err := newTestFixture(config) 603 if err != nil { 604 t.Fatal(err) 605 } 606 607 // r1 @ 80s, 0s lag 608 tf.ratesHistory.add(sinceZero(79*time.Second), 100) 609 tf.process(lagRecord(sinceZero(80*time.Second), r1UID, 0)) 610 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(80*time.Second)); err != nil { 611 t.Fatal(err) 612 } 613 614 // r2 @ 90s, 10s lag 615 tf.ratesHistory.add(sinceZero(80*time.Second), 100) 616 tf.ratesHistory.add(sinceZero(90*time.Second), 200) 617 tf.process(lagRecord(sinceZero(90*time.Second), r2UID, 10)) 618 // Although r2's lag is high, it's ignored because it's the 1 slowest replica. 619 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(80*time.Second)); err != nil { 620 t.Fatal(err) 621 } 622 results := tf.m.results.latestValues() 623 if got, want := len(results), 2; got != want { 624 t.Fatalf("skipped replica should have been recorded on the results page. got = %v, want = %v", got, want) 625 } 626 if got, want := results[0].Reason, fmt.Sprintf("skipping this replica because it's among the 1 slowest %v tablets", typ); got != want { 627 t.Fatalf("skipped replica should have been recorded as skipped on the results page. got = %v, want = %v", got, want) 628 } 629 630 // r1 @ 100s, 20s lag 631 tf.ratesHistory.add(sinceZero(99*time.Second), 200) 632 tf.process(lagRecord(sinceZero(100*time.Second), r1UID, 20)) 633 // r1 would become the new 1 slowest replica. However, we do not ignore it 634 // because then we would ignore all known replicas in a row. 635 // => react to the high lag and reduce the rate by 50% from 200 to 100. 636 if err := tf.checkState(stateEmergency, 100, sinceZero(100*time.Second)); err != nil { 637 t.Fatal(err) 638 } 639 } 640 641 func TestMaxReplicationLagModule_IgnoreNSlowestReplicas_NotEnoughReplicas(t *testing.T) { 642 config := NewMaxReplicationLagModuleConfig(5) 643 config.IgnoreNSlowestReplicas = 1 644 tf, err := newTestFixture(config) 645 if err != nil { 646 t.Fatal(err) 647 } 648 649 // r2 @ 70s, 10s lag 650 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 651 tf.process(lagRecord(sinceZero(70*time.Second), r2, 10)) 652 // r2 is the 1 slowest replica. However, it's not ignored because then we 653 // would ignore all replicas. Therefore, we react to its lag increase. 654 if err := tf.checkState(stateEmergency, 50, sinceZero(70*time.Second)); err != nil { 655 t.Fatal(err) 656 } 657 } 658 659 // TestMaxReplicationLagModule_IgnoreNSlowestReplicas_IsIgnoredDuringIncrease 660 // is almost identical to 661 // TestMaxReplicationLagModule_ReplicaUnderTest_LastErrorOrNotUp. 662 // r2 triggers an increase and we wait for its next update after the increase 663 // waiting period. However, it becomes the slowest replica in the meantime and 664 // will be completely ignored. In consequence, we no longer wait for r2 and r1 665 // can trigger another increase instead. 666 func TestMaxReplicationLagModule_IgnoreNSlowestReplicas_IsIgnoredDuringIncrease(t *testing.T) { 667 config := NewMaxReplicationLagModuleConfig(5) 668 config.IgnoreNSlowestReplicas = 1 669 tf, err := newTestFixture(config) 670 if err != nil { 671 t.Fatal(err) 672 } 673 674 // r2 @ 70s, 0s lag 675 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 676 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 677 // Rate was increased to 200 based on actual rate of 100 within [0s, 69s]. 678 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 679 t.Fatal(err) 680 } 681 682 // r1 @ 80s, 0s lag 683 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 684 tf.ratesHistory.add(sinceZero(79*time.Second), 200) 685 tf.process(lagRecord(sinceZero(80*time.Second), r1, 0)) 686 // Lag record was ignored because it's within the wait period. 687 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 688 t.Fatal(err) 689 } 690 691 // r2 becomes slow and will be ignored now. 692 // r2 @ 90s, 10s lag 693 tf.ratesHistory.add(sinceZero(89*time.Second), 200) 694 tf.m.replicaLagCache.add(lagRecord(sinceZero(90*time.Second), r2, 10)) 695 // We ignore the 1 slowest replica and do not decrease despite r2's high lag. 696 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 697 t.Fatal(err) 698 } 699 700 // r1 @ 110s, 0s lag 701 tf.ratesHistory.add(sinceZero(109*time.Second), 200) 702 tf.process(lagRecord(sinceZero(110*time.Second), r1, 0)) 703 // Meanwhile, r1 is doing fine and will trigger the next increase because 704 // we're no longer waiting for the ignored r2. 705 if err := tf.checkState(stateIncreaseRate, 400, sinceZero(110*time.Second)); err != nil { 706 t.Fatal(err) 707 } 708 } 709 710 // TestMaxReplicationLagModule_IgnoreNSlowestReplicas_IncludeRdonly is the same 711 // as TestMaxReplicationLagModule_IgnoreNSlowestReplicas but includes 712 // RDONLY tablets as well. 713 func TestMaxReplicationLagModule_IgnoreNSlowestReplicas_IncludeRdonly(t *testing.T) { 714 config := NewMaxReplicationLagModuleConfig(5) 715 // We ignore up to 1 REPLICA and 1 RDONLY tablet. 716 config.IgnoreNSlowestReplicas = 1 717 config.IgnoreNSlowestRdonlys = 1 718 tf, err := newTestFixture(config) 719 if err != nil { 720 t.Fatal(err) 721 } 722 723 // r1 @ 80s, 0s lag 724 tf.ratesHistory.add(sinceZero(79*time.Second), 100) 725 tf.process(lagRecord(sinceZero(80*time.Second), r1, 0)) 726 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(80*time.Second)); err != nil { 727 t.Fatal(err) 728 } 729 730 // rdonly1 @ 85s, 0s lag 731 tf.ratesHistory.add(sinceZero(80*time.Second), 100) 732 tf.ratesHistory.add(sinceZero(84*time.Second), 200) 733 tf.process(lagRecord(sinceZero(85*time.Second), rdonly1, 0)) 734 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(80*time.Second)); err != nil { 735 t.Fatal(err) 736 } 737 738 // r2 @ 90s, 10s lag 739 tf.ratesHistory.add(sinceZero(89*time.Second), 200) 740 tf.process(lagRecord(sinceZero(90*time.Second), r2, 10)) 741 // Although r2's lag is high, it's ignored because it's the 1 slowest REPLICA tablet. 742 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(80*time.Second)); err != nil { 743 t.Fatal(err) 744 } 745 results := tf.m.results.latestValues() 746 if got, want := len(results), 3; got != want { 747 t.Fatalf("skipped replica should have been recorded on the results page. got = %v, want = %v", got, want) 748 } 749 if got, want := results[0].Reason, "skipping this replica because it's among the 1 slowest REPLICA tablets"; got != want { 750 t.Fatalf("skipped replica should have been recorded as skipped on the results page. got = %v, want = %v", got, want) 751 } 752 753 // rdonly2 @ 95s, 10s lag 754 tf.ratesHistory.add(sinceZero(94*time.Second), 200) 755 tf.process(lagRecord(sinceZero(95*time.Second), rdonly2, 10)) 756 // Although rdonly2's lag is high, it's ignored because it's the 1 slowest RDONLY tablet. 757 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(80*time.Second)); err != nil { 758 t.Fatal(err) 759 } 760 results = tf.m.results.latestValues() 761 if got, want := len(results), 4; got != want { 762 t.Fatalf("skipped replica should have been recorded on the results page. got = %v, want = %v", got, want) 763 } 764 if got, want := results[0].Reason, "skipping this replica because it's among the 1 slowest RDONLY tablets"; got != want { 765 t.Fatalf("skipped replica should have been recorded as skipped on the results page. got = %v, want = %v", got, want) 766 } 767 768 // r1 @ 100s, 11s lag 769 tf.ratesHistory.add(sinceZero(99*time.Second), 200) 770 tf.process(lagRecord(sinceZero(100*time.Second), r1, 10)) 771 // r1 would become the new 1 slowest REPLICA tablet. However, we do not ignore 772 // it because then we would ignore all known replicas in a row. 773 // => react to the high lag and reduce the rate by 50% from 200 to 100. 774 if err := tf.checkState(stateEmergency, 100, sinceZero(100*time.Second)); err != nil { 775 t.Fatal(err) 776 } 777 778 // r2 and rdonly are omitted here for brevity. 779 780 // r1 recovers, but then rdonly1 becomes slow as well. 781 782 // r1 @ 120s, 0s lag 783 tf.ratesHistory.add(sinceZero(119*time.Second), 100) 784 tf.process(lagRecord(sinceZero(120*time.Second), r1, 0)) 785 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(120*time.Second)); err != nil { 786 t.Fatal(err) 787 } 788 789 // rdonly1 @ 125s, 11s lag 790 tf.ratesHistory.add(sinceZero(120*time.Second), 100) 791 tf.ratesHistory.add(sinceZero(124*time.Second), 200) 792 tf.process(lagRecord(sinceZero(125*time.Second), rdonly1, 11)) 793 // rdonly1 would become the new 1 slowest RDONLY tablet. However, we do not 794 // ignore it because then we would ignore all known replicas in a row. 795 // => react to the high lag and reduce the rate by 50% from 200 to 100. 796 if err := tf.checkState(stateEmergency, 100, sinceZero(125*time.Second)); err != nil { 797 t.Fatal(err) 798 } 799 } 800 801 // TestMaxReplicationLagModule_EmergencyDoesNotChangeBadValues verifies that a 802 // replica, which triggers the emergency state and drags the rate all the way 803 // down to the minimum, does not influence the stored bad rate in the "memory". 804 // This situation occurs when a tablet gets restarted and restores from a 805 // backup. During that time, the reported replication lag is very high and won't 806 // go down until the restore finished (which may take hours). Once the restore 807 // is done, the throttler should immediately continue from the last good rate 808 // before the emergency and not have to test rates starting from the minimum. 809 // In particular, the stored bad rate must not be close the minimum rate or it 810 // may take hours until e.g. a bad rate of 2 ages out to the actual bad rate of 811 // e.g. 201. 812 func TestMaxReplicationLagModule_EmergencyDoesNotChangeBadValues(t *testing.T) { 813 config := NewMaxReplicationLagModuleConfig(5) 814 // Use a very aggressive aging rate to verify that bad rates do not age while 815 // we're in the "emergency" state. 816 config.AgeBadRateAfterSec = 21 817 tf, err := newTestFixture(config) 818 if err != nil { 819 t.Fatal(err) 820 } 821 822 // INCREASE (necessary to set a "good" rate in the memory) 823 824 // r2 @ 70s, 0s lag 825 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 826 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 827 if err := tf.checkState(stateIncreaseRate, 200, sinceZero(70*time.Second)); err != nil { 828 t.Fatal(err) 829 } 830 831 // r2 @ 110s, 0s lag 832 tf.ratesHistory.add(sinceZero(70*time.Second), 100) 833 tf.ratesHistory.add(sinceZero(109*time.Second), 200) 834 tf.process(lagRecord(sinceZero(110*time.Second), r2, 0)) 835 if err := tf.checkState(stateIncreaseRate, 400, sinceZero(110*time.Second)); err != nil { 836 t.Fatal(err) 837 } 838 if got, want := tf.m.memory.highestGood(), int64(200); got != want { 839 t.Fatalf("wrong good rate: got = %v, want = %v", got, want) 840 } 841 842 // DECREASE (necessary to set a "bad" rate in the memory) 843 844 // r2 @ 130s, 3s lag (above target, provokes a decrease) 845 tf.ratesHistory.add(sinceZero(110*time.Second), 200) 846 tf.ratesHistory.add(sinceZero(129*time.Second), 400) 847 tf.process(lagRecord(sinceZero(130*time.Second), r2, 3)) 848 if err := tf.checkState(stateDecreaseAndGuessRate, 280, sinceZero(130*time.Second)); err != nil { 849 t.Fatal(err) 850 } 851 if got, want := tf.m.memory.lowestBad(), int64(400); got != want { 852 t.Fatalf("wrong bad rate: got = %v, want = %v", got, want) 853 } 854 855 // memory: [good, bad] now is [200, 400]. 856 857 // EMERGENCY 858 859 // Assume that "r1" was just restored and has a very high replication lag. 860 // r1 @ 140s, 3600s lag 861 tf.ratesHistory.add(sinceZero(130*time.Second), 400) 862 tf.ratesHistory.add(sinceZero(139*time.Second), 280) 863 tf.process(lagRecord(sinceZero(140*time.Second), r1, 3600)) 864 if err := tf.checkState(stateEmergency, 140, sinceZero(140*time.Second)); err != nil { 865 t.Fatal(err) 866 } 867 if got, want := tf.m.memory.lowestBad(), int64(280); got != want { 868 t.Fatalf("bad rate should change when we transition to the emergency state: got = %v, want = %v", got, want) 869 } 870 871 // memory: [good, bad] now is [200, 280]. 872 873 // r2 @ 150s, 0s lag (ignored because r1 is the "replica under test") 874 tf.ratesHistory.add(sinceZero(140*time.Second), 280) 875 tf.ratesHistory.add(sinceZero(149*time.Second), 140) 876 tf.process(lagRecord(sinceZero(150*time.Second), r2, 0)) 877 if err := tf.checkState(stateEmergency, 140, sinceZero(140*time.Second)); err != nil { 878 t.Fatal(err) 879 } 880 tf.ratesHistory.add(sinceZero(160*time.Second), 140) 881 882 // r1 keeps to drive the throttler rate down, but not the bad rate. 883 884 times := []int{160, 180, 200, 220, 240, 260, 280, 300, 320} 885 rates := []int{70, 35, 17, 8, 4, 2, 1, 1, 1} 886 887 for i, tm := range times { 888 // r1 @ <tt>s, 3600s lag 889 r1Time := sinceZero(time.Duration(tm) * time.Second) 890 if i > 0 { 891 tf.ratesHistory.add(r1Time, int64(rates[i-1])) 892 } 893 tf.process(lagRecord(r1Time, r1, 3600)) 894 if err := tf.checkState(stateEmergency, int64(rates[i]), r1Time); err != nil { 895 t.Fatalf("time=%d: %v", tm, err) 896 } 897 if got, want := tf.m.memory.lowestBad(), int64(280); got != want { 898 t.Fatalf("time=%d: bad rate must not change when the old state is the emergency state: got = %v, want = %v", tm, got, want) 899 } 900 901 // r2 @ <tt+10>s, 0s lag (ignored because r1 is the "replica under test") 902 r2Time := sinceZero(time.Duration(tm+10) * time.Second) 903 tf.ratesHistory.add(r2Time, int64(rates[i])) 904 tf.process(lagRecord(r2Time, r2, 0)) 905 if err := tf.checkState(stateEmergency, int64(rates[i]), r1Time); err != nil { 906 t.Fatalf("time=%d: %v", tm, err) 907 } 908 } 909 910 // INCREASE 911 912 // r1 is fully restored now and its lag is zero. 913 // We'll leave the emergency state and increase the rate based of the last 914 // stored "good" rate. 915 916 // r1 @ 340s, 0s lag 917 tf.ratesHistory.add(sinceZero(339*time.Second), 1) 918 tf.process(lagRecord(sinceZero(340*time.Second), r1, 0)) 919 // New rate is 240, the middle of [good, bad] = [200, 240]. 920 if err := tf.checkState(stateIncreaseRate, 240, sinceZero(340*time.Second)); err != nil { 921 t.Fatal(err) 922 } 923 } 924 925 func TestMaxReplicationLagModule_NoIncreaseIfMaxRateWasNotApproached(t *testing.T) { 926 config := NewMaxReplicationLagModuleConfig(5) 927 tf, err := newTestFixture(config) 928 if err != nil { 929 t.Fatal(err) 930 } 931 932 // r1 @ 20s, 0s lag 933 // This lag record is required in the next step to correctly calculate how 934 // much r1 lags behind due to the rate increase. 935 tf.process(lagRecord(sinceZero(20*time.Second), r1, 0)) 936 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 937 t.Fatal(err) 938 } 939 940 // Master gets 10 QPS in second 69. 941 // r1 @ 70s, 0s lag. 942 // Rate does not double to 200 as it does in the other tests because 943 // the actual rate is much smaller than the current rate of 100. 944 tf.ratesHistory.add(sinceZero(69*time.Second), 10) 945 tf.process(lagRecord(sinceZero(70*time.Second), r1, 0)) 946 // r1 becomes the "replica under test". 947 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 948 t.Fatal(err) 949 } 950 } 951 952 // lagRecord creates a fake record using a fake LegacyTabletStats object. 953 func lagRecord(t time.Time, uid, lag uint32) replicationLagRecord { 954 return replicationLagRecord{t, tabletStats(uid, lag)} 955 } 956 957 // tabletStats creates fake tablet health data. 958 func tabletStats(uid, lag uint32) discovery.TabletHealth { 959 typ := topodatapb.TabletType_REPLICA 960 if uid == rdonly1 || uid == rdonly2 { 961 typ = topodatapb.TabletType_RDONLY 962 } 963 tablet := &topodatapb.Tablet{ 964 Alias: &topodatapb.TabletAlias{Cell: "cell1", Uid: uid}, 965 Keyspace: "ks1", 966 Shard: "-80", 967 Type: typ, 968 PortMap: map[string]int32{"vt": int32(uid)}, 969 } 970 return discovery.TabletHealth{ 971 Tablet: tablet, 972 Target: &querypb.Target{ 973 Keyspace: "ks1", 974 Shard: "-80", 975 TabletType: typ, 976 }, 977 Serving: true, 978 Stats: &querypb.RealtimeStats{ 979 ReplicationLagSeconds: lag, 980 }, 981 PrimaryTermStartTime: 22, 982 LastError: nil, 983 } 984 } 985 986 func TestApplyLatestConfig(t *testing.T) { 987 config := NewMaxReplicationLagModuleConfig(5) 988 tf, err := newTestFixture(config) 989 if err != nil { 990 t.Fatal(err) 991 } 992 993 // We start at config.InitialRate. 994 if err := tf.checkState(stateIncreaseRate, 100, sinceZero(1*time.Second)); err != nil { 995 t.Fatal(err) 996 } 997 // Change the default MaxIncrease from 100% to 200% and test that it's 998 // correctly propagated. 999 config.MaxIncrease = 2 1000 tf.m.updateConfiguration(config.Configuration, true /* copyZeroValues */) 1001 1002 // r2 @ 70s, 0s lag 1003 tf.ratesHistory.add(sinceZero(69*time.Second), 100) 1004 tf.process(lagRecord(sinceZero(70*time.Second), r2, 0)) 1005 // Rate was increased to 300 based on an actual rate of 100 within [0s, 69s]. 1006 // That's a 200% increase. 1007 if err := tf.checkState(stateIncreaseRate, 300, sinceZero(70*time.Second)); err != nil { 1008 t.Fatal(err) 1009 } 1010 1011 // Now reset the config to its default values. 1012 tf.m.resetConfiguration() 1013 // Let's assume that the current rate of 300 was actually fine. 1014 // After the reset, we'll increase it only by 100% to 600. 1015 1016 // r2 @ 110s, 0s lag 1017 tf.ratesHistory.add(sinceZero(80*time.Second), 300) 1018 tf.ratesHistory.add(sinceZero(109*time.Second), 300) 1019 tf.process(lagRecord(sinceZero(110*time.Second), r2, 0)) 1020 if err := tf.checkState(stateIncreaseRate, 600, sinceZero(110*time.Second)); err != nil { 1021 t.Fatal(err) 1022 } 1023 }