vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/messager/message_manager.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package messager 18 19 import ( 20 "bytes" 21 "context" 22 "fmt" 23 "io" 24 "math/rand" 25 "sync" 26 "time" 27 28 "vitess.io/vitess/go/mysql" 29 "vitess.io/vitess/go/sqltypes" 30 "vitess.io/vitess/go/stats" 31 "vitess.io/vitess/go/sync2" 32 "vitess.io/vitess/go/timer" 33 "vitess.io/vitess/go/vt/log" 34 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 35 querypb "vitess.io/vitess/go/vt/proto/query" 36 "vitess.io/vitess/go/vt/sqlparser" 37 "vitess.io/vitess/go/vt/vtgate/evalengine" 38 "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" 39 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 40 ) 41 42 var ( 43 // MessageStats tracks stats for messages. 44 MessageStats = stats.NewGaugesWithMultiLabels( 45 "Messages", 46 "Stats for messages", 47 []string{"TableName", "Metric"}) 48 ) 49 50 type QueryGenerator interface { 51 GenerateAckQuery(ids []string) (string, map[string]*querypb.BindVariable) 52 GeneratePostponeQuery(ids []string) (string, map[string]*querypb.BindVariable) 53 GeneratePurgeQuery(timeCutoff int64) (string, map[string]*querypb.BindVariable) 54 } 55 56 type messageReceiver struct { 57 ctx context.Context 58 errChan chan error 59 send func(*sqltypes.Result) error 60 cancel context.CancelFunc 61 } 62 63 func newMessageReceiver(ctx context.Context, send func(*sqltypes.Result) error) (*messageReceiver, <-chan struct{}) { 64 ctx, cancel := context.WithCancel(ctx) 65 rcv := &messageReceiver{ 66 ctx: ctx, 67 errChan: make(chan error, 1), 68 send: send, 69 cancel: cancel, 70 } 71 return rcv, ctx.Done() 72 } 73 74 func (rcv *messageReceiver) Send(qr *sqltypes.Result) error { 75 // We have to use a channel so we can also 76 // monitor the context. 77 go func() { 78 rcv.errChan <- rcv.send(qr) 79 }() 80 select { 81 case <-rcv.ctx.Done(): 82 return io.EOF 83 case err := <-rcv.errChan: 84 if err == io.EOF { 85 // This is only a failsafe. If we received an EOF, 86 // grpc would have already canceled the context. 87 rcv.cancel() 88 } 89 return err 90 } 91 } 92 93 // receiverWithStatus is a separate struct to signify 94 // that the busy flag is controlled by the messageManager 95 // mutex. 96 type receiverWithStatus struct { 97 receiver *messageReceiver 98 busy bool 99 } 100 101 // messageManager manages messages for a message table. 102 // 103 // messageManager has three core components that interact with each other. 104 // 1. The cache: this is essentially the send queue. Its size is limited by the 105 // number of rows. 106 // 2. The send loop: this loop pulls items out of the cache and sends them to the 107 // various clients. 108 // 3. The poller: this wakes up periodically to fill the cache with values by 109 // reading the message table from the database. 110 // The message manager operates in three modes: 111 // 112 // Idle mode 113 // This mode is entered if there is no client connected or if there are 114 // no outstanding messages to be sent. In this mode: 115 // The cache is empty. 116 // The send loop is in a cond.Wait state doing nothing. 117 // The poller wakes up periodically, but terminates immediately. 118 // Idle mode is exited when there is at least one client connected 119 // and there are pending messages to be sent. 120 // 121 // Steady state mode 122 // In this mode, there are connected clients and there is a continuous 123 // stream of messages being sent. The cache is not full, and there are 124 // occasional resends. 125 // Message creation inserts rows to the database and adds them to the cache. 126 // Each addition makes sure the send loop is awakened if it's waiting. 127 // The send loop continuously pulls items out of the cache and sends them. 128 // After every successful send, the messages are postponed, and are 129 // also removed from the cache. 130 // The poller wakes up periodically, loads messages that are due and adds them 131 // to the cache. Most of these items are likely to be those that did not 132 // receive a timely ack. 133 // 134 // messagesPending mode 135 // This mode is a variation of the steady state mode. This mode is 136 // entered when there are outstanding items in the database that need to be sent 137 // but are not present in the cache. This state can be entered in one 138 // of two ways: 139 // 1. The poller read returns as many rows as the cache size 140 // 2. The Add of a message fails (cache full). This is invoked from the vstream. 141 // In any of the above cases, the messagesPending flag gets turned on. 142 // In this phase, the send loop proactively wakes up the poller every time 143 // it clears the cache. 144 // The system exits the messagesPending state if the number of items the poller 145 // loads are less than the cache size and all cache adds are successful. 146 // If so, the system reverts to the steady state mode. 147 // 148 // Rate limiting 149 // There are two ways for the system to rate-limit: 150 // 1. Client ingestion rate. If clients ingest messages slowly, 151 // that makes the senders wait on them to send more messages. 152 // 2. Postpone rate limiting: A client is considered to be non-busy only 153 // after it has postponed the message it has sent. This way, if postpones 154 // are too slow, the clients become less available and essentially 155 // limit the send rate to how fast messages can be postponed. 156 // The postpone functions also needs to obtain a semaphore that limits 157 // the number of tx pool connections they can occupy. 158 // 159 // Client load balancing 160 // The messages are sent to the clients in a round-robin fashion. 161 // If, for some reason, a client is closed, the load balancer resets 162 // by starting with the first non-busy client. 163 // 164 // The Purge thread 165 // This thread is mostly independent. It wakes up periodically 166 // to delete old rows that were successfully acked. 167 type messageManager struct { 168 tsv TabletService 169 vs VStreamer 170 171 name sqlparser.IdentifierCS 172 fieldResult *sqltypes.Result 173 ackWaitTime time.Duration 174 purgeAfter time.Duration 175 minBackoff time.Duration 176 maxBackoff time.Duration 177 batchSize int 178 pollerTicks *timer.Timer 179 purgeTicks *timer.Timer 180 postponeSema *sync2.Semaphore 181 182 mu sync.Mutex 183 isOpen bool 184 // cond waits on curReceiver == -1 || cache.IsEmpty(): 185 // No current receivers available or cache is empty. 186 cond sync.Cond 187 cache *cache 188 receivers []*receiverWithStatus 189 curReceiver int 190 messagesPending bool 191 // streamCancel is set when a vstream is running, and is reset 192 // to nil after a cancel. This allows for startVStream and stopVStream 193 // to be idempotent. 194 // This is implicitly protected by the main mutex because startVStream 195 // and stopVStream are called while holding the main mutex. 196 streamCancel func() 197 198 // cacheManagementMu keeps the cache and database consistent with each 199 // other by ensuring that only one of the streams is processing messages 200 // and updating the cache at a time. The poller uses a results streamer to 201 // pull directly from the message table and the message manager uses a 202 // binlog streamer to process change events. This mutex ensures that only 203 // one of them are updating the cache at any one time. 204 // It prevents items from being removed from cache while the poller 205 // reads from the db and adds items to it. Otherwise, the poller 206 // might add an older snapshot of a row that was just postponed. 207 // It blocks the vstream (binlog streamer) from receiving messages while 208 // the poller reads a snapshot and updates lastPollPosition. Any events 209 // older than lastPollPosition must be ignored by the vstream. It 210 // consequently also blocks vstream from updating the cache while the 211 // poller is active. 212 // TODO(mattlord): since this is primarily a flow control mechanism, we 213 // should do it in a more idiomatic go way using channels or cond vars. 214 cacheManagementMu sync.Mutex 215 // The lastPollPosition variable is the main point of coordination 216 // between the poller and the binlog streamer to ensure that we are 217 // not re-processing older events and moving along linearly in the 218 // shared virtual GTID stream within the message manager. 219 // It is theoretically possible for the binlog streamer to be ahead of 220 // the lastPollPosition. This is because of how semi-sync works today 221 // where a replica could have received and processed a GTID that the primary 222 // may not have yet commited; but this is harmless because any events missed 223 // will be picked up during the next poller run. 224 lastPollPosition *mysql.Position 225 226 // wg is for ensuring all running goroutines have returned 227 // before we can close the manager. You need to Add before 228 // launching any gorooutine while holding a lock on mu. 229 // The goroutine must in turn defer on Done. 230 wg sync.WaitGroup 231 232 vsFilter *binlogdatapb.Filter 233 readByPriorityAndTimeNext *sqlparser.ParsedQuery 234 ackQuery *sqlparser.ParsedQuery 235 postponeQuery *sqlparser.ParsedQuery 236 purgeQuery *sqlparser.ParsedQuery 237 } 238 239 // newMessageManager creates a new message manager. 240 // Calls into tsv have to be made asynchronously. Otherwise, 241 // it can lead to deadlocks. 242 func newMessageManager(tsv TabletService, vs VStreamer, table *schema.Table, postponeSema *sync2.Semaphore) *messageManager { 243 mm := &messageManager{ 244 tsv: tsv, 245 vs: vs, 246 name: table.Name, 247 fieldResult: &sqltypes.Result{ 248 Fields: table.MessageInfo.Fields, 249 }, 250 ackWaitTime: table.MessageInfo.AckWaitDuration, 251 purgeAfter: table.MessageInfo.PurgeAfterDuration, 252 minBackoff: table.MessageInfo.MinBackoff, 253 maxBackoff: table.MessageInfo.MaxBackoff, 254 batchSize: table.MessageInfo.BatchSize, 255 cache: newCache(table.MessageInfo.CacheSize), 256 pollerTicks: timer.NewTimer(table.MessageInfo.PollInterval), 257 purgeTicks: timer.NewTimer(table.MessageInfo.PollInterval), 258 postponeSema: postponeSema, 259 messagesPending: true, 260 } 261 mm.cond.L = &mm.mu 262 263 columnList := buildSelectColumnList(table) 264 vsQuery := fmt.Sprintf("select priority, time_next, epoch, time_acked, %s from %v", columnList, mm.name) 265 mm.vsFilter = &binlogdatapb.Filter{ 266 Rules: []*binlogdatapb.Rule{{ 267 Match: table.Name.String(), 268 Filter: vsQuery, 269 }}, 270 } 271 mm.readByPriorityAndTimeNext = sqlparser.BuildParsedQuery( 272 // There should be a poller_idx defined on (time_acked, priority, time_next desc) 273 // for this to be as effecient as possible 274 "select priority, time_next, epoch, time_acked, %s from %v where time_acked is null and time_next < %a order by priority, time_next desc limit %a", 275 columnList, mm.name, ":time_next", ":max") 276 mm.ackQuery = sqlparser.BuildParsedQuery( 277 "update %v set time_acked = %a, time_next = null where id in %a and time_acked is null", 278 mm.name, ":time_acked", "::ids") 279 mm.purgeQuery = sqlparser.BuildParsedQuery( 280 "delete from %v where time_acked < %a limit 500", mm.name, ":time_acked") 281 282 mm.postponeQuery = buildPostponeQuery(mm.name, mm.minBackoff, mm.maxBackoff) 283 284 return mm 285 } 286 287 func buildPostponeQuery(name sqlparser.IdentifierCS, minBackoff, maxBackoff time.Duration) *sqlparser.ParsedQuery { 288 var args []any 289 290 // since messages are immediately postponed upon sending, we need to add exponential backoff on top 291 // of the ackWaitTime, otherwise messages will be resent too quickly. 292 buf := bytes.NewBufferString("update %v set time_next = %a + %a + ") 293 args = append(args, name, ":time_now", ":wait_time") 294 295 // have backoff be +/- 33%, whenever this is injected, append (:min_backoff, :jitter) 296 jitteredBackoff := "FLOOR((%a<<ifnull(epoch, 0)) * %a)" 297 298 // 299 // if the jittered backoff is less than min_backoff, just set it to :min_backoff 300 // 301 buf.WriteString(fmt.Sprintf("IF(%s < %%a, %%a, ", jitteredBackoff)) 302 // jitteredBackoff < :min_backoff 303 args = append(args, ":min_backoff", ":jitter", ":min_backoff") 304 // if it is less, then use :min_backoff 305 args = append(args, ":min_backoff") 306 307 // now we are setting the false case on the above IF statement 308 if maxBackoff == 0 { 309 // if there is no max_backoff, just use jitteredBackoff 310 buf.WriteString(jitteredBackoff) 311 args = append(args, ":min_backoff", ":jitter") 312 } else { 313 // make sure that it doesn't exceed max_backoff 314 buf.WriteString(fmt.Sprintf("IF(%s > %%a, %%a, %s)", jitteredBackoff, jitteredBackoff)) 315 // jitteredBackoff > :max_backoff 316 args = append(args, ":min_backoff", ":jitter", ":max_backoff") 317 // if it is greater, then use :max_backoff 318 args = append(args, ":max_backoff") 319 // otherwise just use jitteredBackoff 320 args = append(args, ":min_backoff", ":jitter") 321 } 322 323 // close the if statement 324 buf.WriteString(")") 325 326 // now that we've identified time_next, finish the statement 327 buf.WriteString(", epoch = ifnull(epoch, 0)+1 where id in %a and time_acked is null") 328 args = append(args, "::ids") 329 330 return sqlparser.BuildParsedQuery(buf.String(), args...) 331 } 332 333 // buildSelectColumnList is a convenience function that 334 // builds a 'select' list for the user-defined columns. 335 func buildSelectColumnList(t *schema.Table) string { 336 buf := sqlparser.NewTrackedBuffer(nil) 337 for i, c := range t.MessageInfo.Fields { 338 // Column names may have to be escaped. 339 if i == 0 { 340 buf.Myprintf("%v", sqlparser.NewIdentifierCI(c.Name)) 341 } else { 342 buf.Myprintf(", %v", sqlparser.NewIdentifierCI(c.Name)) 343 } 344 } 345 return buf.String() 346 } 347 348 // Open starts the messageManager service. 349 func (mm *messageManager) Open() { 350 mm.mu.Lock() 351 defer mm.mu.Unlock() 352 if mm.isOpen { 353 return 354 } 355 mm.isOpen = true 356 mm.curReceiver = -1 357 358 mm.wg.Add(1) 359 go mm.runSend() // calls the offsetting mm.wg.Done() 360 // TODO(sougou): improve ticks to add randomness. 361 mm.pollerTicks.Start(mm.runPoller) 362 mm.purgeTicks.Start(mm.runPurge) 363 } 364 365 // Close stops the messageManager service. 366 func (mm *messageManager) Close() { 367 log.Infof("messageManager - started execution of Close") 368 mm.pollerTicks.Stop() 369 mm.purgeTicks.Stop() 370 log.Infof("messageManager - stopped the ticks. Acquiring mu Lock") 371 372 mm.mu.Lock() 373 log.Infof("messageManager - acquired mu Lock") 374 if !mm.isOpen { 375 log.Infof("messageManager - manager is not open") 376 mm.mu.Unlock() 377 return 378 } 379 mm.isOpen = false 380 log.Infof("messageManager - cancelling all receivers") 381 for _, rcvr := range mm.receivers { 382 rcvr.receiver.cancel() 383 } 384 mm.receivers = nil 385 MessageStats.Set([]string{mm.name.String(), "ClientCount"}, 0) 386 log.Infof("messageManager - clearing cache") 387 mm.cache.Clear() 388 log.Infof("messageManager - sending a broadcast") 389 // This broadcast will cause runSend to exit. 390 mm.cond.Broadcast() 391 log.Infof("messageManager - stopping VStream") 392 mm.stopVStream() 393 mm.mu.Unlock() 394 395 log.Infof("messageManager - Waiting for the wait group") 396 mm.wg.Wait() 397 log.Infof("messageManager - closed") 398 } 399 400 // Subscribe registers the send function as a receiver of messages 401 // and returns a 'done' channel that will be closed when the subscription 402 // ends. There are many reasons for a subscription to end: a grpc context 403 // cancel or timeout, or tabletserver shutdown, etc. 404 func (mm *messageManager) Subscribe(ctx context.Context, send func(*sqltypes.Result) error) <-chan struct{} { 405 receiver, done := newMessageReceiver(ctx, send) 406 407 mm.mu.Lock() 408 defer mm.mu.Unlock() 409 if !mm.isOpen { 410 receiver.cancel() 411 return done 412 } 413 414 if err := receiver.Send(mm.fieldResult); err != nil { 415 log.Errorf("Terminating connection due to error sending field info: %v", err) 416 receiver.cancel() 417 return done 418 } 419 420 withStatus := &receiverWithStatus{ 421 receiver: receiver, 422 } 423 if len(mm.receivers) == 0 { 424 mm.startVStream() 425 } 426 mm.receivers = append(mm.receivers, withStatus) 427 MessageStats.Set([]string{mm.name.String(), "ClientCount"}, int64(len(mm.receivers))) 428 if mm.curReceiver == -1 { 429 mm.rescanReceivers(-1) 430 } 431 432 // Track the context and unsubscribe if it gets cancelled. 433 go func() { 434 <-done 435 mm.unsubscribe(receiver) 436 }() 437 return done 438 } 439 440 func (mm *messageManager) unsubscribe(receiver *messageReceiver) { 441 mm.mu.Lock() 442 defer mm.mu.Unlock() 443 for i, rcv := range mm.receivers { 444 if rcv.receiver != receiver { 445 continue 446 } 447 // Delete the item at current position. 448 n := len(mm.receivers) 449 copy(mm.receivers[i:n-1], mm.receivers[i+1:n]) 450 mm.receivers = mm.receivers[0 : n-1] 451 MessageStats.Set([]string{mm.name.String(), "ClientCount"}, int64(len(mm.receivers))) 452 break 453 } 454 // curReceiver is obsolete. Recompute. 455 mm.rescanReceivers(-1) 456 // If there are no receivers. Shut down the cache. 457 if len(mm.receivers) == 0 { 458 mm.stopVStream() 459 mm.cache.Clear() 460 } 461 } 462 463 // rescanReceivers finds the next available receiver 464 // using start as the starting point. If one was found, 465 // it sets curReceiver to that index. If curReceiver 466 // was previously -1, it broadcasts. If none was found, 467 // curReceiver is set to -1. If there's no starting point, 468 // it must be specified as -1. 469 func (mm *messageManager) rescanReceivers(start int) { 470 cur := start 471 for range mm.receivers { 472 cur = (cur + 1) % len(mm.receivers) 473 if !mm.receivers[cur].busy { 474 if mm.curReceiver == -1 { 475 mm.cond.Broadcast() 476 } 477 mm.curReceiver = cur 478 return 479 } 480 } 481 // Nothing was found. 482 mm.curReceiver = -1 483 } 484 485 // Add adds the message to the cache. It returns true 486 // if successful. If the message is already present, 487 // it still returns true. 488 func (mm *messageManager) Add(mr *MessageRow) bool { 489 mm.mu.Lock() 490 defer mm.mu.Unlock() 491 if len(mm.receivers) == 0 { 492 return false 493 } 494 // If cache is empty, we have to broadcast that we're not empty 495 // any more. 496 if mm.cache.IsEmpty() { 497 defer mm.cond.Broadcast() 498 } 499 if !mm.cache.Add(mr) { 500 // Cache is full. Enter "messagesPending" mode. 501 mm.messagesPending = true 502 return false 503 } 504 return true 505 } 506 507 func (mm *messageManager) runSend() { 508 defer func() { 509 mm.tsv.LogError() 510 mm.wg.Done() 511 }() 512 513 mm.mu.Lock() 514 defer mm.mu.Unlock() 515 516 for { 517 // It's theoretically possible that this loop can keep going without 518 // a wait. If so, the lock will never be released for other functions 519 // like Close to take action. So, let's release and acquire the lock 520 // to avoid starving other contenders. 521 mm.mu.Unlock() 522 mm.mu.Lock() 523 524 var rows [][]sqltypes.Value 525 for { 526 if !mm.isOpen { 527 return 528 } 529 530 // If cache became empty, there are messages pending, and there are subscribed 531 // receivers, we have to trigger the poller to fetch more. 532 if mm.cache.IsEmpty() && mm.messagesPending && len(mm.receivers) != 0 { 533 // Do this as a separate goroutine. Otherwise, this could cause 534 // the following deadlock: 535 // 1. runSend obtains a lock 536 // 2. Poller gets triggered, and waits for lock. 537 // 3. runSend calls this function, but the trigger will hang because 538 // this function cannot return until poller returns. 539 go mm.pollerTicks.Trigger() 540 } 541 542 // If there are no receivers or cache is empty, we wait. 543 if mm.curReceiver == -1 || mm.cache.IsEmpty() { 544 mm.cond.Wait() 545 continue 546 } 547 548 // Fetch rows from cache. 549 lateCount := int64(0) 550 for i := 0; i < mm.batchSize; i++ { 551 mr := mm.cache.Pop() 552 if mr == nil { 553 break 554 } 555 if mr.Epoch >= 1 { 556 lateCount++ 557 } 558 rows = append(rows, mr.Row) 559 } 560 MessageStats.Add([]string{mm.name.String(), "Delayed"}, lateCount) 561 562 // If we have rows to send, break out of this loop. 563 if rows != nil { 564 break 565 } 566 } 567 MessageStats.Add([]string{mm.name.String(), "Sent"}, int64(len(rows))) 568 // If we're here, there is a current receiver, and messages 569 // to send. Reserve the receiver and find the next one. 570 receiver := mm.receivers[mm.curReceiver] 571 receiver.busy = true 572 mm.rescanReceivers(mm.curReceiver) 573 574 // Send the message asynchronously. 575 mm.wg.Add(1) 576 go mm.send(receiver, &sqltypes.Result{Rows: rows}) // calls the offsetting mm.wg.Done() 577 } 578 } 579 580 func (mm *messageManager) send(receiver *receiverWithStatus, qr *sqltypes.Result) { 581 defer func() { 582 mm.tsv.LogError() 583 mm.wg.Done() 584 }() 585 586 ids := make([]string, len(qr.Rows)) 587 for i, row := range qr.Rows { 588 ids[i] = row[0].ToString() 589 } 590 591 defer func() { 592 // Hold cacheManagementMu to prevent the ids from being discarded 593 // if poller is active. Otherwise, it could have read a 594 // snapshot of a row before the postponement and requeue 595 // the message. 596 mm.cacheManagementMu.Lock() 597 defer mm.cacheManagementMu.Unlock() 598 mm.cache.Discard(ids) 599 }() 600 601 defer func() { 602 mm.mu.Lock() 603 defer mm.mu.Unlock() 604 605 receiver.busy = false 606 // Rescan if there were no previously available receivers 607 // because the current receiver became non-busy. 608 if mm.curReceiver == -1 { 609 mm.rescanReceivers(-1) 610 } 611 }() 612 613 if err := receiver.receiver.Send(qr); err != nil { 614 // Log the error, but we still want to postpone the message. 615 // Otherwise, if this is a chronic failure like "message too 616 // big", we'll end up spamming non-stop. 617 log.Errorf("Error sending messages: %v: %v", qr, err) 618 } 619 mm.postpone(mm.tsv, mm.ackWaitTime, ids) 620 } 621 622 func (mm *messageManager) postpone(tsv TabletService, ackWaitTime time.Duration, ids []string) { 623 // Use the semaphore to limit parallelism. 624 if !mm.postponeSema.Acquire() { 625 // Unreachable. 626 return 627 } 628 defer mm.postponeSema.Release() 629 ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), ackWaitTime) 630 defer cancel() 631 if _, err := tsv.PostponeMessages(ctx, nil, mm, ids); err != nil { 632 // This can happen during spikes. Record the incident for monitoring. 633 MessageStats.Add([]string{mm.name.String(), "PostponeFailed"}, 1) 634 } 635 } 636 637 func (mm *messageManager) startVStream() { 638 if mm.streamCancel != nil { 639 return 640 } 641 var ctx context.Context 642 ctx, mm.streamCancel = context.WithCancel(tabletenv.LocalContext()) 643 go mm.runVStream(ctx) 644 } 645 646 func (mm *messageManager) stopVStream() { 647 log.Infof("messageManager - calling stream cancel") 648 if mm.streamCancel != nil { 649 mm.streamCancel() 650 mm.streamCancel = nil 651 } 652 } 653 654 func (mm *messageManager) runVStream(ctx context.Context) { 655 for { 656 err := mm.runOneVStream(ctx) 657 select { 658 case <-ctx.Done(): 659 log.Info("Context canceled, exiting vstream") 660 return 661 default: 662 } 663 MessageStats.Add([]string{mm.name.String(), "VStreamFailed"}, 1) 664 log.Infof("VStream ended: %v, retrying in 5 seconds", err) 665 time.Sleep(5 * time.Second) 666 } 667 } 668 669 // runOneVStream watches for any new rows or rows that have been modified. 670 // Whether it's an insert or an update, if the new value of the 671 // row indicates that the message is eligible to be sent, it's added to 672 // the cache. 673 // Deletes are ignored. 674 // If the poller updates lastPollPosition, then all GTIDs up to that 675 // point are deemed obsolete and are skipped. 676 func (mm *messageManager) runOneVStream(ctx context.Context) error { 677 var curPos string 678 var fields []*querypb.Field 679 680 err := mm.vs.Stream(ctx, "current", nil, mm.vsFilter, func(events []*binlogdatapb.VEvent) error { 681 // We need to get the flow control lock 682 mm.cacheManagementMu.Lock() 683 defer mm.cacheManagementMu.Unlock() 684 685 select { 686 case <-ctx.Done(): 687 return io.EOF 688 default: 689 } 690 691 mustSkip := func() (bool, error) { 692 if mm.lastPollPosition == nil { 693 return false, nil 694 } 695 if curPos == "" { 696 return true, nil 697 } 698 cur, err := mysql.DecodePosition(curPos) 699 if err != nil { 700 return false, err 701 } 702 if cur.AtLeast(*mm.lastPollPosition) { 703 mm.lastPollPosition = nil 704 return false, nil 705 } 706 return true, nil 707 } 708 skipEvents, err := mustSkip() 709 if err != nil { 710 return err 711 } 712 var newPos string 713 for _, ev := range events { 714 switch ev.Type { 715 case binlogdatapb.VEventType_FIELD: 716 fields = ev.FieldEvent.Fields 717 case binlogdatapb.VEventType_ROW: 718 if skipEvents { 719 continue 720 } 721 if err := mm.processRowEvent(fields, ev.RowEvent); err != nil { 722 return err 723 } 724 case binlogdatapb.VEventType_GTID: 725 newPos = ev.Gtid 726 case binlogdatapb.VEventType_COMMIT, binlogdatapb.VEventType_DDL, binlogdatapb.VEventType_OTHER: 727 // Update curPos only when the GTID concludes, which is through one 728 // of the above events. 729 curPos = newPos 730 skipEvents, err = mustSkip() 731 if err != nil { 732 return err 733 } 734 } 735 } 736 return nil 737 }) 738 return err 739 } 740 741 func (mm *messageManager) processRowEvent(fields []*querypb.Field, rowEvent *binlogdatapb.RowEvent) error { 742 if fields == nil { 743 // Unreachable. 744 return fmt.Errorf("internal error: unexpected rows without fields") 745 } 746 747 now := time.Now().UnixNano() 748 for _, rc := range rowEvent.RowChanges { 749 if rc.After == nil { 750 continue 751 } 752 row := sqltypes.MakeRowTrusted(fields, rc.After) 753 mr, err := BuildMessageRow(row) 754 if err != nil { 755 return err 756 } 757 if mr.TimeAcked != 0 || mr.TimeNext > now { 758 continue 759 } 760 mm.Add(mr) 761 } 762 return nil 763 } 764 765 func (mm *messageManager) runPoller() { 766 // We need to get the flow control lock first 767 mm.cacheManagementMu.Lock() 768 defer mm.cacheManagementMu.Unlock() 769 // Now we can get the main/structure lock and ensure e.g. that the 770 // the receiver count does not change during the run 771 mm.mu.Lock() 772 defer mm.mu.Unlock() 773 774 // Fast-path. Skip all the work. 775 if len(mm.receivers) == 0 { 776 return 777 } 778 779 ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), mm.pollerTicks.Interval()) 780 defer func() { 781 mm.tsv.LogError() 782 cancel() 783 }() 784 785 size := mm.cache.Size() 786 bindVars := map[string]*querypb.BindVariable{ 787 "time_next": sqltypes.Int64BindVariable(time.Now().UnixNano()), 788 "max": sqltypes.Int64BindVariable(int64(size)), 789 } 790 791 qr, err := mm.readPending(ctx, bindVars) 792 if err != nil { 793 return 794 } 795 796 mm.messagesPending = false 797 if len(qr.Rows) >= size { 798 // There are probably more messages to be sent. 799 mm.messagesPending = true 800 } 801 if len(qr.Rows) != 0 { 802 // We've most likely added items. 803 // Wake up the sender. 804 defer mm.cond.Broadcast() 805 } 806 for _, row := range qr.Rows { 807 mr, err := BuildMessageRow(row) 808 if err != nil { 809 mm.tsv.Stats().InternalErrors.Add("Messages", 1) 810 log.Errorf("Error reading message row: %v", err) 811 continue 812 } 813 if !mm.cache.Add(mr) { 814 mm.messagesPending = true 815 return 816 } 817 } 818 } 819 820 func (mm *messageManager) runPurge() { 821 go func() { 822 ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), mm.purgeTicks.Interval()) 823 defer func() { 824 mm.tsv.LogError() 825 cancel() 826 }() 827 for { 828 count, err := mm.tsv.PurgeMessages(ctx, nil, mm, time.Now().Add(-mm.purgeAfter).UnixNano()) 829 if err != nil { 830 MessageStats.Add([]string{mm.name.String(), "PurgeFailed"}, 1) 831 log.Errorf("Unable to delete messages: %v", err) 832 } else { 833 MessageStats.Add([]string{mm.name.String(), "Purged"}, count) 834 } 835 // If deleted 500 or more, we should continue. 836 if count < 500 { 837 return 838 } 839 } 840 }() 841 } 842 843 // GenerateAckQuery returns the query and bind vars for acking a message. 844 func (mm *messageManager) GenerateAckQuery(ids []string) (string, map[string]*querypb.BindVariable) { 845 idbvs := &querypb.BindVariable{ 846 Type: querypb.Type_TUPLE, 847 Values: make([]*querypb.Value, 0, len(ids)), 848 } 849 for _, id := range ids { 850 idbvs.Values = append(idbvs.Values, &querypb.Value{ 851 Type: querypb.Type_VARBINARY, 852 Value: []byte(id), 853 }) 854 } 855 return mm.ackQuery.Query, map[string]*querypb.BindVariable{ 856 "time_acked": sqltypes.Int64BindVariable(time.Now().UnixNano()), 857 "ids": idbvs, 858 } 859 } 860 861 // GeneratePostponeQuery returns the query and bind vars for postponing a message. 862 func (mm *messageManager) GeneratePostponeQuery(ids []string) (string, map[string]*querypb.BindVariable) { 863 idbvs := &querypb.BindVariable{ 864 Type: querypb.Type_TUPLE, 865 Values: make([]*querypb.Value, 0, len(ids)), 866 } 867 for _, id := range ids { 868 idbvs.Values = append(idbvs.Values, &querypb.Value{ 869 Type: querypb.Type_VARBINARY, 870 Value: []byte(id), 871 }) 872 } 873 874 bvs := map[string]*querypb.BindVariable{ 875 "time_now": sqltypes.Int64BindVariable(time.Now().UnixNano()), 876 "wait_time": sqltypes.Int64BindVariable(int64(mm.ackWaitTime)), 877 "min_backoff": sqltypes.Int64BindVariable(int64(mm.minBackoff)), 878 "jitter": sqltypes.Float64BindVariable(.666666 + rand.Float64()*.666666), 879 "ids": idbvs, 880 } 881 882 if mm.maxBackoff > 0 { 883 bvs["max_backoff"] = sqltypes.Int64BindVariable(int64(mm.maxBackoff)) 884 } 885 886 return mm.postponeQuery.Query, bvs 887 } 888 889 // GeneratePurgeQuery returns the query and bind vars for purging messages. 890 func (mm *messageManager) GeneratePurgeQuery(timeCutoff int64) (string, map[string]*querypb.BindVariable) { 891 return mm.purgeQuery.Query, map[string]*querypb.BindVariable{ 892 "time_acked": sqltypes.Int64BindVariable(timeCutoff), 893 } 894 } 895 896 // BuildMessageRow builds a MessageRow from a db row. 897 func BuildMessageRow(row []sqltypes.Value) (*MessageRow, error) { 898 mr := &MessageRow{Row: row[4:]} 899 if !row[0].IsNull() { 900 v, err := evalengine.ToInt64(row[0]) 901 if err != nil { 902 return nil, err 903 } 904 mr.Priority = v 905 } 906 if !row[1].IsNull() { 907 v, err := evalengine.ToInt64(row[1]) 908 if err != nil { 909 return nil, err 910 } 911 mr.TimeNext = v 912 } 913 if !row[2].IsNull() { 914 v, err := evalengine.ToInt64(row[2]) 915 if err != nil { 916 return nil, err 917 } 918 mr.Epoch = v 919 } 920 if !row[3].IsNull() { 921 v, err := evalengine.ToInt64(row[3]) 922 if err != nil { 923 return nil, err 924 } 925 mr.TimeAcked = v 926 } 927 return mr, nil 928 } 929 930 func (mm *messageManager) readPending(ctx context.Context, bindVars map[string]*querypb.BindVariable) (*sqltypes.Result, error) { 931 query, err := mm.readByPriorityAndTimeNext.GenerateQuery(bindVars, nil) 932 if err != nil { 933 mm.tsv.Stats().InternalErrors.Add("Messages", 1) 934 log.Errorf("Error reading rows from message table: %v", err) 935 return nil, err 936 } 937 qr := &sqltypes.Result{} 938 err = mm.vs.StreamResults(ctx, query, func(response *binlogdatapb.VStreamResultsResponse) error { 939 if response.Fields != nil { 940 qr.Fields = response.Fields 941 } 942 if response.Gtid != "" { 943 pos, err := mysql.DecodePosition(response.Gtid) 944 if err != nil { 945 return err 946 } 947 mm.lastPollPosition = &pos 948 } 949 for _, row := range response.Rows { 950 qr.Rows = append(qr.Rows, sqltypes.MakeRowTrusted(qr.Fields, row)) 951 } 952 return nil 953 }) 954 if err != nil { 955 return nil, err 956 } 957 return qr, err 958 } 959 960 func (mm *messageManager) getReceiverCount() int { 961 mm.mu.Lock() 962 defer mm.mu.Unlock() 963 return len(mm.receivers) 964 } 965 966 func (mm *messageManager) getLastPollPosition() *mysql.Position { 967 mm.cacheManagementMu.Lock() 968 defer mm.cacheManagementMu.Unlock() 969 return mm.lastPollPosition 970 }