vitess.io/vitess@v0.16.2/go/vt/throttler/demo/throttler_demo.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "context" 21 "math/rand" 22 "net/http" 23 "sync" 24 "testing" 25 "time" 26 27 "github.com/spf13/pflag" 28 29 "vitess.io/vitess/go/vt/discovery" 30 "vitess.io/vitess/go/vt/log" 31 "vitess.io/vitess/go/vt/logutil" 32 querypb "vitess.io/vitess/go/vt/proto/query" 33 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 34 "vitess.io/vitess/go/vt/servenv" 35 "vitess.io/vitess/go/vt/throttler" 36 "vitess.io/vitess/go/vt/topo" 37 "vitess.io/vitess/go/vt/topo/memorytopo" 38 "vitess.io/vitess/go/vt/vttablet/grpcqueryservice" 39 "vitess.io/vitess/go/vt/vttablet/queryservice/fakes" 40 "vitess.io/vitess/go/vt/vttablet/tmclient" 41 "vitess.io/vitess/go/vt/wrangler" 42 "vitess.io/vitess/go/vt/wrangler/testlib" 43 ) 44 45 // This file contains a demo binary that demonstrates how the resharding 46 // throttler adapts its throttling rate to the replication lag. 47 // 48 // The throttler is necessary because replicas apply transactions at a slower 49 // rate than primaries and fall behind at high write throughput. 50 // (Mostly they fall behind because MySQL replication is single threaded but 51 // the write throughput on the primary does not have to.) 52 // 53 // This demo simulates a client (writer), a primary and a replica. 54 // The client writes to the primary which in turn replicas everything to the 55 // replica. 56 // The replica measures its replication lag via the timestamp which is part of 57 // each message. 58 // While the primary has no rate limit, the replica is limited to 59 // --rate (see below) transactions/second. The client runs the resharding 60 // throttler which tries to throttle the client based on the observed 61 // replication lag. 62 63 var ( 64 rate = int64(1000) 65 duration = 600 * time.Second 66 lagUpdateInterval = 5 * time.Second 67 replicaDegrationDuration = 10 * time.Second 68 replicaDegrationInterval time.Duration 69 ) 70 71 const flagSetName = "throttler_demo" 72 73 func registerDemoFlags(fs *pflag.FlagSet) { 74 fs.Int64Var(&rate, "rate", rate, "maximum rate of the throttled demo server at the start") 75 fs.DurationVar(&duration, "duration", duration, "total duration the demo runs") 76 fs.DurationVar(&lagUpdateInterval, "lag_update_interval", lagUpdateInterval, "interval at which the current replication lag will be broadcast to the throttler") 77 fs.DurationVar(&replicaDegrationInterval, "replica_degration_interval", replicaDegrationInterval, "simulate a throughput degration of the replica every X interval (i.e. the replica applies transactions at a slower rate for -reparent_duration and the replication lag might go up)") 78 fs.DurationVar(&replicaDegrationDuration, "replica_degration_duration", replicaDegrationDuration, "duration a simulated degration should take") 79 } 80 81 // primary simulates an *unthrottled* MySQL primary which replicates every 82 // received "execute" call to a known "replica". 83 type primary struct { 84 replica *replica 85 } 86 87 // execute is the simulated RPC which is called by the client. 88 func (m *primary) execute(msg time.Time) { 89 m.replica.replicate(msg) 90 } 91 92 // replica simulates a *throttled* MySQL replica. 93 // If it cannot keep up with applying the primary writes, it will report a 94 // replication lag > 0 seconds. 95 type replica struct { 96 fakeTablet *testlib.FakeTablet 97 qs *fakes.StreamHealthQueryService 98 99 // replicationStream is the incoming stream of messages from the primary. 100 replicationStream chan time.Time 101 102 // throttler is used to enforce the maximum rate at which replica applies 103 // transactions. It must not be confused with the client's throttler. 104 throttler *throttler.Throttler 105 lastHealthUpdate time.Time 106 lagUpdateInterval time.Duration 107 108 degrationInterval time.Duration 109 degrationDuration time.Duration 110 nextDegration time.Time 111 currentDegrationEnd time.Time 112 113 stopChan chan struct{} 114 wg sync.WaitGroup 115 } 116 117 func newReplica(lagUpdateInterval, degrationInterval, degrationDuration time.Duration, ts *topo.Server) *replica { 118 t := &testing.T{} 119 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 120 fakeTablet := testlib.NewFakeTablet(t, wr, "cell1", 0, 121 topodatapb.TabletType_REPLICA, nil, testlib.TabletKeyspaceShard(t, "ks", "-80")) 122 fakeTablet.StartActionLoop(t, wr) 123 124 target := &querypb.Target{ 125 Keyspace: "ks", 126 Shard: "-80", 127 TabletType: topodatapb.TabletType_REPLICA, 128 } 129 qs := fakes.NewStreamHealthQueryService(target) 130 grpcqueryservice.Register(fakeTablet.RPCServer, qs) 131 132 throttler, err := throttler.NewThrottler("replica", "TPS", 1, rate, throttler.ReplicationLagModuleDisabled) 133 if err != nil { 134 log.Fatal(err) 135 } 136 137 var nextDegration time.Time 138 if degrationInterval != time.Duration(0) { 139 nextDegration = time.Now().Add(degrationInterval) 140 } 141 r := &replica{ 142 fakeTablet: fakeTablet, 143 qs: qs, 144 throttler: throttler, 145 replicationStream: make(chan time.Time, 1*1024*1024), 146 lagUpdateInterval: lagUpdateInterval, 147 degrationInterval: degrationInterval, 148 degrationDuration: degrationDuration, 149 nextDegration: nextDegration, 150 stopChan: make(chan struct{}), 151 } 152 r.wg.Add(1) 153 go r.processReplicationStream() 154 return r 155 } 156 157 func (r *replica) replicate(msg time.Time) { 158 r.replicationStream <- msg 159 } 160 161 func (r *replica) processReplicationStream() { 162 defer r.wg.Done() 163 164 // actualRate counts the number of requests per r.lagUpdateInterval. 165 actualRate := 0 166 for msg := range r.replicationStream { 167 select { 168 case <-r.stopChan: 169 return 170 default: 171 } 172 173 now := time.Now() 174 if now.Sub(r.lastHealthUpdate) > r.lagUpdateInterval { 175 // Broadcast current lag every "lagUpdateInterval". 176 // 177 // Use integer values to calculate the lag. In consequence, the reported 178 // lag will constantly vary between the floor and ceil value e.g. 179 // an actual lag of 0.5s could be reported as 0s or 1s based on the 180 // truncation of the two times. 181 lagTruncated := uint32(now.Unix() - msg.Unix()) 182 // Display lag with a higher precision as well. 183 lag := now.Sub(msg).Seconds() 184 log.Infof("current lag: %1ds (%1.1fs) replica rate: % 7.1f chan len: % 6d", lagTruncated, lag, float64(actualRate)/r.lagUpdateInterval.Seconds(), len(r.replicationStream)) 185 r.qs.AddHealthResponseWithReplicationLag(lagTruncated) 186 r.lastHealthUpdate = now 187 actualRate = 0 188 } 189 if !r.nextDegration.IsZero() && time.Now().After(r.nextDegration) && r.currentDegrationEnd.IsZero() { 190 degradedRate := rand.Int63n(rate) 191 log.Infof("degrading the replica for %.f seconds from %v TPS to %v", r.degrationDuration.Seconds(), rate, degradedRate) 192 r.throttler.SetMaxRate(degradedRate) 193 r.currentDegrationEnd = time.Now().Add(r.degrationDuration) 194 } 195 if !r.currentDegrationEnd.IsZero() && time.Now().After(r.currentDegrationEnd) { 196 log.Infof("degrading the replica stopped. Restoring TPS to: %v", rate) 197 r.throttler.SetMaxRate(rate) 198 r.currentDegrationEnd = time.Time{} 199 r.nextDegration = time.Now().Add(r.degrationInterval) 200 } 201 202 for { 203 backoff := r.throttler.Throttle(0 /* threadID */) 204 if backoff == throttler.NotThrottled { 205 break 206 } 207 time.Sleep(backoff) 208 } 209 actualRate++ 210 } 211 } 212 213 func (r *replica) stop() { 214 close(r.replicationStream) 215 close(r.stopChan) 216 log.Info("Triggered replica shutdown. Waiting for it to stop.") 217 r.wg.Wait() 218 r.fakeTablet.StopActionLoop(&testing.T{}) 219 } 220 221 // client simulates a client which should throttle itself based on the 222 // replication lag of all replicas. 223 type client struct { 224 primary *primary 225 226 healthCheck discovery.HealthCheck 227 throttler *throttler.Throttler 228 229 stopChan chan struct{} 230 wg sync.WaitGroup 231 healthcheckCh chan *discovery.TabletHealth 232 } 233 234 func newClient(primary *primary, replica *replica, ts *topo.Server) *client { 235 t, err := throttler.NewThrottler("client", "TPS", 1, throttler.MaxRateModuleDisabled, 5 /* seconds */) 236 if err != nil { 237 log.Fatal(err) 238 } 239 240 healthCheck := discovery.NewHealthCheck(context.Background(), 5*time.Second, 1*time.Minute, ts, "cell1", "") 241 c := &client{ 242 primary: primary, 243 healthCheck: healthCheck, 244 throttler: t, 245 stopChan: make(chan struct{}), 246 } 247 healthcheckCh := c.healthCheck.Subscribe() 248 c.healthcheckCh = healthcheckCh 249 c.healthCheck.AddTablet(replica.fakeTablet.Tablet) 250 return c 251 } 252 253 func (c *client) run() { 254 c.wg.Add(1) 255 go c.loop() 256 } 257 258 func (c *client) loop() { 259 defer c.wg.Done() 260 261 for { 262 select { 263 case <-c.stopChan: 264 return 265 case th := <-c.healthcheckCh: 266 c.StatsUpdate(th) 267 default: 268 } 269 270 for { 271 backoff := c.throttler.Throttle(0 /* threadID */) 272 if backoff == throttler.NotThrottled { 273 break 274 } 275 time.Sleep(backoff) 276 } 277 278 c.primary.execute(time.Now()) 279 } 280 } 281 282 func (c *client) stop() { 283 close(c.stopChan) 284 c.wg.Wait() 285 286 c.healthCheck.Close() 287 c.throttler.Close() 288 } 289 290 // StatsUpdate gets called by the healthCheck instance every time a tablet broadcasts 291 // a health update. 292 func (c *client) StatsUpdate(ts *discovery.TabletHealth) { 293 // Ignore unless REPLICA or RDONLY. 294 if ts.Target.TabletType != topodatapb.TabletType_REPLICA && ts.Target.TabletType != topodatapb.TabletType_RDONLY { 295 return 296 } 297 298 c.throttler.RecordReplicationLag(time.Now(), ts) 299 } 300 301 func main() { 302 servenv.ParseFlags(flagSetName) 303 304 go servenv.RunDefault() 305 http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 306 http.Redirect(w, r, "/throttlerz", http.StatusTemporaryRedirect) 307 }) 308 309 log.Infof("start rate set to: %v", rate) 310 ts := memorytopo.NewServer("cell1") 311 replica := newReplica(lagUpdateInterval, replicaDegrationInterval, replicaDegrationDuration, ts) 312 primary := &primary{replica: replica} 313 client := newClient(primary, replica, ts) 314 client.run() 315 316 time.Sleep(duration) 317 client.stop() 318 replica.stop() 319 } 320 321 func init() { 322 servenv.RegisterDefaultFlags() 323 servenv.RegisterFlags() 324 servenv.RegisterGRPCServerFlags() 325 servenv.RegisterGRPCServerAuthFlags() 326 servenv.OnParseFor(flagSetName, registerDemoFlags) 327 }