vitess.io/vitess@v0.16.2/go/test/endtoend/tabletmanager/throttler/throttler_test.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 package throttler 17 18 import ( 19 "context" 20 "flag" 21 "fmt" 22 "io" 23 "net/http" 24 "os" 25 "testing" 26 "time" 27 28 "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" 29 30 "vitess.io/vitess/go/test/endtoend/cluster" 31 32 "github.com/stretchr/testify/assert" 33 "github.com/stretchr/testify/require" 34 ) 35 36 var ( 37 clusterInstance *cluster.LocalProcessCluster 38 primaryTablet *cluster.Vttablet 39 replicaTablet *cluster.Vttablet 40 hostname = "localhost" 41 keyspaceName = "ks" 42 cell = "zone1" 43 sqlSchema = ` 44 create table t1( 45 id bigint, 46 value varchar(16), 47 primary key(id) 48 ) Engine=InnoDB; 49 ` 50 51 vSchema = ` 52 { 53 "sharded": true, 54 "vindexes": { 55 "hash": { 56 "type": "hash" 57 } 58 }, 59 "tables": { 60 "t1": { 61 "column_vindexes": [ 62 { 63 "column": "id", 64 "name": "hash" 65 } 66 ] 67 } 68 } 69 }` 70 71 httpClient = base.SetupHTTPClient(time.Second) 72 throttledAppsAPIPath = "throttler/throttled-apps" 73 checkAPIPath = "throttler/check" 74 checkSelfAPIPath = "throttler/check-self" 75 ) 76 77 const ( 78 throttlerThreshold = 1 * time.Second // standard, tight threshold 79 onDemandHeartbeatDuration = 5 * time.Second 80 applyConfigWait = 15 * time.Second // time after which we're sure the throttler has refreshed config and tablets 81 ) 82 83 func TestMain(m *testing.M) { 84 defer cluster.PanicHandler(nil) 85 flag.Parse() 86 87 exitCode := func() int { 88 clusterInstance = cluster.NewCluster(cell, hostname) 89 defer clusterInstance.Teardown() 90 91 // Start topo server 92 err := clusterInstance.StartTopo() 93 if err != nil { 94 return 1 95 } 96 97 // Set extra tablet args for lock timeout 98 clusterInstance.VtTabletExtraArgs = []string{ 99 "--lock_tables_timeout", "5s", 100 "--watch_replication_stream", 101 "--enable_replication_reporter", 102 "--enable-lag-throttler", 103 "--throttle_threshold", throttlerThreshold.String(), 104 "--heartbeat_enable", 105 "--heartbeat_interval", "250ms", 106 "--heartbeat_on_demand_duration", onDemandHeartbeatDuration.String(), 107 "--disable_active_reparents", 108 } 109 110 // Start keyspace 111 keyspace := &cluster.Keyspace{ 112 Name: keyspaceName, 113 SchemaSQL: sqlSchema, 114 VSchema: vSchema, 115 } 116 117 if err = clusterInstance.StartUnshardedKeyspace(*keyspace, 1, false); err != nil { 118 return 1 119 } 120 121 // Collect table paths and ports 122 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 123 for _, tablet := range tablets { 124 if tablet.Type == "primary" { 125 primaryTablet = tablet 126 } else if tablet.Type != "rdonly" { 127 replicaTablet = tablet 128 } 129 } 130 131 return m.Run() 132 }() 133 os.Exit(exitCode) 134 } 135 136 func throttledApps(tablet *cluster.Vttablet) (resp *http.Response, respBody string, err error) { 137 resp, err = httpClient.Get(fmt.Sprintf("http://localhost:%d/%s", tablet.HTTPPort, throttledAppsAPIPath)) 138 if err != nil { 139 return resp, respBody, err 140 } 141 b, err := io.ReadAll(resp.Body) 142 if err != nil { 143 return resp, respBody, err 144 } 145 respBody = string(b) 146 return resp, respBody, err 147 } 148 149 func throttleCheck(tablet *cluster.Vttablet, skipRequestHeartbeats bool) (*http.Response, error) { 150 return httpClient.Get(fmt.Sprintf("http://localhost:%d/%s?s=%t", tablet.HTTPPort, checkAPIPath, skipRequestHeartbeats)) 151 } 152 153 func throttleCheckSelf(tablet *cluster.Vttablet) (*http.Response, error) { 154 return httpClient.Head(fmt.Sprintf("http://localhost:%d/%s", tablet.HTTPPort, checkSelfAPIPath)) 155 } 156 157 func warmUpHeartbeat(t *testing.T) (respStatus int) { 158 // because we run with -heartbeat_on_demand_duration=5s, the heartbeat is "cold" right now. 159 // Let's warm it up. 160 resp, err := throttleCheck(primaryTablet, false) 161 require.NoError(t, err) 162 defer resp.Body.Close() 163 time.Sleep(time.Second) 164 return resp.StatusCode 165 } 166 167 // waitForThrottleCheckStatus waits for the tablet to return the provided HTTP code in a throttle check 168 func waitForThrottleCheckStatus(t *testing.T, tablet *cluster.Vttablet, wantCode int) { 169 _ = warmUpHeartbeat(t) 170 ctx, cancel := context.WithTimeout(context.Background(), onDemandHeartbeatDuration+applyConfigWait) 171 defer cancel() 172 173 for { 174 resp, err := throttleCheck(tablet, true) 175 require.NoError(t, err) 176 177 if wantCode == resp.StatusCode { 178 // Wait for any cached check values to be cleared and the new 179 // status value to be in effect everywhere before returning. 180 resp.Body.Close() 181 return 182 } 183 select { 184 case <-ctx.Done(): 185 b, err := io.ReadAll(resp.Body) 186 require.NoError(t, err) 187 resp.Body.Close() 188 189 assert.Equal(t, wantCode, resp.StatusCode, "body: %v", string(b)) 190 return 191 default: 192 resp.Body.Close() 193 time.Sleep(time.Second) 194 } 195 } 196 } 197 198 func TestThrottlerAfterMetricsCollected(t *testing.T) { 199 defer cluster.PanicHandler(t) 200 201 // We run with on-demand heartbeats. Immediately as the tablet manager opens, it sends a one-time 202 // request for heartbeats, which means the throttler is able to collect initial "good" data. 203 // After a few seconds, the heartbeat lease terminates. We wait for that. 204 // {"StatusCode":429,"Value":4.864921,"Threshold":1,"Message":"Threshold exceeded"} 205 t.Run("expect push back once initial heartbeat lease terminates", func(t *testing.T) { 206 time.Sleep(onDemandHeartbeatDuration) 207 waitForThrottleCheckStatus(t, primaryTablet, http.StatusTooManyRequests) 208 }) 209 t.Run("requesting heartbeats", func(t *testing.T) { 210 respStatus := warmUpHeartbeat(t) 211 assert.NotEqual(t, http.StatusOK, respStatus) 212 }) 213 t.Run("expect OK once heartbeats lease renewed", func(t *testing.T) { 214 time.Sleep(1 * time.Second) 215 resp, err := throttleCheck(primaryTablet, false) 216 require.NoError(t, err) 217 defer resp.Body.Close() 218 assert.Equal(t, http.StatusOK, resp.StatusCode) 219 }) 220 t.Run("expect OK once heartbeats lease renewed, still", func(t *testing.T) { 221 time.Sleep(1 * time.Second) 222 resp, err := throttleCheck(primaryTablet, false) 223 require.NoError(t, err) 224 defer resp.Body.Close() 225 assert.Equal(t, http.StatusOK, resp.StatusCode) 226 }) 227 t.Run("validate throttled-apps", func(t *testing.T) { 228 resp, body, err := throttledApps(primaryTablet) 229 require.NoError(t, err) 230 defer resp.Body.Close() 231 assert.Equal(t, http.StatusOK, resp.StatusCode) 232 assert.Contains(t, body, "always-throttled-app") 233 }) 234 t.Run("validate check-self", func(t *testing.T) { 235 resp, err := throttleCheckSelf(primaryTablet) 236 require.NoError(t, err) 237 defer resp.Body.Close() 238 assert.Equal(t, http.StatusOK, resp.StatusCode) 239 }) 240 t.Run("validate check-self, again", func(t *testing.T) { 241 resp, err := throttleCheckSelf(replicaTablet) 242 require.NoError(t, err) 243 defer resp.Body.Close() 244 assert.Equal(t, http.StatusOK, resp.StatusCode) 245 }) 246 } 247 248 func TestLag(t *testing.T) { 249 defer cluster.PanicHandler(t) 250 // Stop VTOrc because we want to stop replication to increase lag. 251 // We don't want VTOrc to fix this. 252 clusterInstance.DisableVTOrcRecoveries(t) 253 defer clusterInstance.EnableVTOrcRecoveries(t) 254 255 t.Run("stopping replication", func(t *testing.T) { 256 err := clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", replicaTablet.Alias) 257 assert.NoError(t, err) 258 }) 259 t.Run("accumulating lag, expecting throttler push back", func(t *testing.T) { 260 time.Sleep(2 * throttlerThreshold) 261 262 resp, err := throttleCheck(primaryTablet, false) 263 require.NoError(t, err) 264 defer resp.Body.Close() 265 assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode) 266 }) 267 t.Run("primary self-check should still be fine", func(t *testing.T) { 268 resp, err := throttleCheckSelf(primaryTablet) 269 require.NoError(t, err) 270 defer resp.Body.Close() 271 // self (on primary) is unaffected by replication lag 272 assert.Equal(t, http.StatusOK, resp.StatusCode) 273 }) 274 t.Run("replica self-check should show error", func(t *testing.T) { 275 resp, err := throttleCheckSelf(replicaTablet) 276 require.NoError(t, err) 277 defer resp.Body.Close() 278 assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode) 279 }) 280 t.Run("starting replication", func(t *testing.T) { 281 err := clusterInstance.VtctlclientProcess.ExecuteCommand("StartReplication", replicaTablet.Alias) 282 assert.NoError(t, err) 283 }) 284 t.Run("expecting replication to catch up and throttler check to return OK", func(t *testing.T) { 285 waitForThrottleCheckStatus(t, primaryTablet, http.StatusOK) 286 }) 287 t.Run("primary self-check should be fine", func(t *testing.T) { 288 resp, err := throttleCheckSelf(primaryTablet) 289 require.NoError(t, err) 290 defer resp.Body.Close() 291 // self (on primary) is unaffected by replication lag 292 assert.Equal(t, http.StatusOK, resp.StatusCode) 293 }) 294 t.Run("replica self-check should be fine", func(t *testing.T) { 295 resp, err := throttleCheckSelf(replicaTablet) 296 require.NoError(t, err) 297 defer resp.Body.Close() 298 assert.Equal(t, http.StatusOK, resp.StatusCode) 299 }) 300 } 301 302 func TestNoReplicas(t *testing.T) { 303 defer cluster.PanicHandler(t) 304 t.Run("changing replica to RDONLY", func(t *testing.T) { 305 err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replicaTablet.Alias, "RDONLY") 306 assert.NoError(t, err) 307 308 // This makes no REPLICA servers available. We expect something like: 309 // {"StatusCode":200,"Value":0,"Threshold":1,"Message":""} 310 waitForThrottleCheckStatus(t, primaryTablet, http.StatusOK) 311 }) 312 t.Run("restoring to REPLICA", func(t *testing.T) { 313 314 err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replicaTablet.Alias, "REPLICA") 315 assert.NoError(t, err) 316 317 waitForThrottleCheckStatus(t, primaryTablet, http.StatusOK) 318 }) 319 }