vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vstreamer 18 19 import ( 20 "bytes" 21 "context" 22 "fmt" 23 "io" 24 "time" 25 26 "google.golang.org/protobuf/encoding/prototext" 27 28 "vitess.io/vitess/go/mysql/collations" 29 "vitess.io/vitess/go/timer" 30 vtschema "vitess.io/vitess/go/vt/schema" 31 "vitess.io/vitess/go/vt/vterrors" 32 33 "vitess.io/vitess/go/mysql" 34 "vitess.io/vitess/go/sqltypes" 35 "vitess.io/vitess/go/sync2" 36 "vitess.io/vitess/go/vt/binlog" 37 "vitess.io/vitess/go/vt/dbconfigs" 38 "vitess.io/vitess/go/vt/log" 39 "vitess.io/vitess/go/vt/sqlparser" 40 "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" 41 42 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 43 querypb "vitess.io/vitess/go/vt/proto/query" 44 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 45 ) 46 47 const ( 48 trxHistoryLenQuery = `select count as history_len from information_schema.INNODB_METRICS where name = 'trx_rseg_history_len'` 49 replicaLagQuery = `show slave status` 50 hostQuery = `select @@hostname as hostname, @@port as port` 51 ) 52 53 // HeartbeatTime is set to slightly below 1s, compared to idleTimeout 54 // set by VPlayer at slightly above 1s. This minimizes conflicts 55 // between the two timeouts. 56 var HeartbeatTime = 900 * time.Millisecond 57 58 // vschemaUpdateCount is for testing only. 59 // vstreamer is a mutex free data structure. So, it's not safe to access its members 60 // from a test. Since VSchema gets updated asynchronously, there's no way for a test 61 // to wait for it. Instead, the code that updates the vschema increments this atomic 62 // counter, which will let the tests poll for it to change. 63 // TODO(sougou): find a better way for this. 64 var vschemaUpdateCount sync2.AtomicInt64 65 66 // vstreamer is for serving a single vreplication stream on the source side. 67 type vstreamer struct { 68 ctx context.Context 69 cancel func() 70 71 cp dbconfigs.Connector 72 se *schema.Engine 73 startPos string 74 filter *binlogdatapb.Filter 75 send func([]*binlogdatapb.VEvent) error 76 77 vevents chan *localVSchema 78 vschema *localVSchema 79 plans map[uint64]*streamerPlan 80 journalTableID uint64 81 versionTableID uint64 82 83 // format and pos are updated by parseEvent. 84 format mysql.BinlogFormat 85 pos mysql.Position 86 stopPos string 87 88 phase string 89 vse *Engine 90 } 91 92 // streamerPlan extends the original plan to also include 93 // the TableMap, which comes from the binlog. It's used 94 // to extract values from the ROW events. 95 type streamerPlan struct { 96 *Plan 97 TableMap *mysql.TableMap 98 } 99 100 // newVStreamer creates a new vstreamer. 101 // cp: the mysql conn params. 102 // sh: the schema engine. The vstreamer uses it to convert the TableMap into field info. 103 // startPos: a flavor compliant position to stream from. This can also contain the special 104 // 105 // value "current", which means start from the current position. 106 // 107 // filter: the list of filtering rules. If a rule has a select expression for its filter, 108 // 109 // the select list can only reference direct columns. No other expressions are allowed. 110 // The select expression is allowed to contain the special 'keyspace_id()' function which 111 // will return the keyspace id of the row. Examples: 112 // "select * from t", same as an empty Filter, 113 // "select * from t where in_keyrange('-80')", same as "-80", 114 // "select * from t where in_keyrange(col1, 'hash', '-80')", 115 // "select col1, col2 from t where...", 116 // "select col1, keyspace_id() from t where...". 117 // Only "in_keyrange" and limited comparison operators (see enum Opcode in planbuilder.go) are supported in the where clause. 118 // Other constructs like joins, group by, etc. are not supported. 119 // 120 // vschema: the current vschema. This value can later be changed through the SetVSchema method. 121 // send: callback function to send events. 122 func newVStreamer(ctx context.Context, cp dbconfigs.Connector, se *schema.Engine, startPos string, stopPos string, filter *binlogdatapb.Filter, vschema *localVSchema, send func([]*binlogdatapb.VEvent) error, phase string, vse *Engine) *vstreamer { 123 ctx, cancel := context.WithCancel(ctx) 124 return &vstreamer{ 125 ctx: ctx, 126 cancel: cancel, 127 cp: cp, 128 se: se, 129 startPos: startPos, 130 stopPos: stopPos, 131 filter: filter, 132 send: send, 133 vevents: make(chan *localVSchema, 1), 134 vschema: vschema, 135 plans: make(map[uint64]*streamerPlan), 136 phase: phase, 137 vse: vse, 138 } 139 } 140 141 // SetVSchema updates the vstreamer against the new vschema. 142 func (vs *vstreamer) SetVSchema(vschema *localVSchema) { 143 // Since vs.Stream is a single-threaded loop. We just send an event to 144 // that thread, which helps us avoid mutexes to update the plans. 145 select { 146 case vs.vevents <- vschema: 147 case <-vs.ctx.Done(): 148 default: // if there is a pending vschema in the channel, drain it and update it with the latest one 149 select { 150 case <-vs.vevents: 151 vs.vevents <- vschema 152 default: 153 } 154 } 155 } 156 157 // Cancel stops the streaming. 158 func (vs *vstreamer) Cancel() { 159 vs.cancel() 160 } 161 162 // Stream streams binlog events. 163 func (vs *vstreamer) Stream() error { 164 //defer vs.cancel() 165 ctx := context.Background() 166 vs.vse.vstreamerCount.Add(1) 167 defer func() { 168 ctx.Done() 169 vs.vse.vstreamerCount.Add(-1) 170 }() 171 vs.vse.vstreamersCreated.Add(1) 172 log.Infof("Starting Stream() with startPos %s", vs.startPos) 173 pos, err := mysql.DecodePosition(vs.startPos) 174 if err != nil { 175 vs.vse.errorCounts.Add("StreamRows", 1) 176 vs.vse.vstreamersEndedWithErrors.Add(1) 177 return err 178 } 179 vs.pos = pos 180 return vs.replicate(ctx) 181 } 182 183 // Stream streams binlog events. 184 func (vs *vstreamer) replicate(ctx context.Context) error { 185 // Ensure se is Open. If vttablet came up in a non_serving role, 186 // the schema engine may not have been initialized. 187 if err := vs.se.Open(); err != nil { 188 return wrapError(err, vs.pos, vs.vse) 189 } 190 191 conn, err := binlog.NewBinlogConnection(vs.cp) 192 if err != nil { 193 return wrapError(err, vs.pos, vs.vse) 194 } 195 defer conn.Close() 196 197 events, errs, err := conn.StartBinlogDumpFromPosition(vs.ctx, "", vs.pos) 198 if err != nil { 199 return wrapError(err, vs.pos, vs.vse) 200 } 201 err = vs.parseEvents(vs.ctx, events, errs) 202 return wrapError(err, vs.pos, vs.vse) 203 } 204 205 // parseEvents parses and sends events. 206 func (vs *vstreamer) parseEvents(ctx context.Context, events <-chan mysql.BinlogEvent, errs <-chan error) error { 207 // bufferAndTransmit uses bufferedEvents and curSize to buffer events. 208 var ( 209 bufferedEvents []*binlogdatapb.VEvent 210 curSize int 211 ) 212 213 // Only the following patterns are possible: 214 // BEGIN->ROWs or Statements->GTID->COMMIT. In the case of large transactions, this can be broken into chunks. 215 // BEGIN->JOURNAL->GTID->COMMIT 216 // GTID->DDL 217 // GTID->OTHER 218 // HEARTBEAT is issued if there's inactivity, which is likely 219 // to heppend between one group of events and another. 220 // 221 // Buffering only takes row or statement lengths into consideration. 222 // Length of other events is considered negligible. 223 // If a new row event causes the packet size to be exceeded, 224 // all existing rows are sent without the new row. 225 // If a single row exceeds the packet size, it will be in its own packet. 226 bufferAndTransmit := func(vevent *binlogdatapb.VEvent) error { 227 vevent.Keyspace = vs.vse.keyspace 228 vevent.Shard = vs.vse.shard 229 230 switch vevent.Type { 231 case binlogdatapb.VEventType_GTID, binlogdatapb.VEventType_BEGIN, binlogdatapb.VEventType_FIELD, 232 binlogdatapb.VEventType_JOURNAL: 233 // We never have to send GTID, BEGIN, FIELD events on their own. 234 // A JOURNAL event is always preceded by a BEGIN and followed by a COMMIT. 235 // So, we don't have to send it right away. 236 bufferedEvents = append(bufferedEvents, vevent) 237 case binlogdatapb.VEventType_COMMIT, binlogdatapb.VEventType_DDL, binlogdatapb.VEventType_OTHER, 238 binlogdatapb.VEventType_HEARTBEAT, binlogdatapb.VEventType_VERSION: 239 // COMMIT, DDL, OTHER and HEARTBEAT must be immediately sent. 240 // Although unlikely, it's possible to get a HEARTBEAT in the middle 241 // of a transaction. If so, we still send the partial transaction along 242 // with the heartbeat. 243 bufferedEvents = append(bufferedEvents, vevent) 244 vevents := bufferedEvents 245 bufferedEvents = nil 246 curSize = 0 247 return vs.send(vevents) 248 case binlogdatapb.VEventType_INSERT, binlogdatapb.VEventType_DELETE, binlogdatapb.VEventType_UPDATE, binlogdatapb.VEventType_REPLACE: 249 newSize := len(vevent.GetDml()) 250 if curSize+newSize > defaultPacketSize { 251 vs.vse.vstreamerNumPackets.Add(1) 252 vevents := bufferedEvents 253 bufferedEvents = []*binlogdatapb.VEvent{vevent} 254 curSize = newSize 255 return vs.send(vevents) 256 } 257 curSize += newSize 258 bufferedEvents = append(bufferedEvents, vevent) 259 case binlogdatapb.VEventType_ROW: 260 // ROW events happen inside transactions. So, we can chunk them. 261 // Buffer everything until packet size is reached, and then send. 262 newSize := 0 263 for _, rowChange := range vevent.RowEvent.RowChanges { 264 if rowChange.Before != nil { 265 newSize += len(rowChange.Before.Values) 266 } 267 if rowChange.After != nil { 268 newSize += len(rowChange.After.Values) 269 } 270 } 271 if curSize+newSize > defaultPacketSize { 272 vs.vse.vstreamerNumPackets.Add(1) 273 vevents := bufferedEvents 274 bufferedEvents = []*binlogdatapb.VEvent{vevent} 275 curSize = newSize 276 return vs.send(vevents) 277 } 278 curSize += newSize 279 bufferedEvents = append(bufferedEvents, vevent) 280 default: 281 vs.vse.errorCounts.Add("BufferAndTransmit", 1) 282 return fmt.Errorf("unexpected event: %v", vevent) 283 } 284 return nil 285 } 286 287 // Main loop: calls bufferAndTransmit as events arrive. 288 hbTimer := time.NewTimer(HeartbeatTime) 289 defer hbTimer.Stop() 290 291 injectHeartbeat := func(throttled bool) error { 292 now := time.Now().UnixNano() 293 select { 294 case <-ctx.Done(): 295 return vterrors.Errorf(vtrpcpb.Code_CANCELED, "context has expired") 296 default: 297 err := bufferAndTransmit(&binlogdatapb.VEvent{ 298 Type: binlogdatapb.VEventType_HEARTBEAT, 299 Timestamp: now / 1e9, 300 CurrentTime: now, 301 Throttled: throttled, 302 }) 303 return err 304 } 305 } 306 307 throttleEvents := func(throttledEvents chan mysql.BinlogEvent) { 308 throttledHeartbeatsRateLimiter := timer.NewRateLimiter(HeartbeatTime) 309 for { 310 // check throttler. 311 if !vs.vse.throttlerClient.ThrottleCheckOKOrWait(ctx) { 312 // make sure to leave if context is cancelled 313 select { 314 case <-ctx.Done(): 315 return 316 default: 317 // do nothing special 318 } 319 throttledHeartbeatsRateLimiter.Do(func() error { 320 return injectHeartbeat(true) 321 }) 322 // we won't process events, until we're no longer throttling 323 continue 324 } 325 select { 326 case ev, ok := <-events: 327 if ok { 328 select { 329 case throttledEvents <- ev: 330 case <-ctx.Done(): 331 return 332 } 333 } else { 334 close(throttledEvents) 335 return 336 } 337 case <-ctx.Done(): 338 return 339 } 340 } 341 } 342 // throttledEvents can be read just like you would read from events 343 // throttledEvents pulls data from events, but throttles pulling data, 344 // which in turn blocks the BinlogConnection from pushing events to the channel 345 throttledEvents := make(chan mysql.BinlogEvent) 346 go throttleEvents(throttledEvents) 347 348 for { 349 hbTimer.Reset(HeartbeatTime) 350 // Drain event if timer fired before reset. 351 select { 352 case <-hbTimer.C: 353 default: 354 } 355 356 select { 357 case ev, ok := <-throttledEvents: 358 if !ok { 359 select { 360 case <-ctx.Done(): 361 return nil 362 default: 363 } 364 return fmt.Errorf("unexpected server EOF") 365 } 366 vevents, err := vs.parseEvent(ev) 367 if err != nil { 368 vs.vse.errorCounts.Add("ParseEvent", 1) 369 return err 370 } 371 for _, vevent := range vevents { 372 if err := bufferAndTransmit(vevent); err != nil { 373 if err == io.EOF { 374 return nil 375 } 376 vs.vse.errorCounts.Add("BufferAndTransmit", 1) 377 return fmt.Errorf("error sending event: %v", err) 378 } 379 } 380 case vs.vschema = <-vs.vevents: 381 select { 382 case <-ctx.Done(): 383 return nil 384 default: 385 if err := vs.rebuildPlans(); err != nil { 386 return err 387 } 388 // Increment this counter for testing. 389 vschemaUpdateCount.Add(1) 390 } 391 case err := <-errs: 392 return err 393 case <-ctx.Done(): 394 return nil 395 case <-hbTimer.C: 396 if err := injectHeartbeat(false); err != nil { 397 if err == io.EOF { 398 return nil 399 } 400 vs.vse.errorCounts.Add("Send", 1) 401 return fmt.Errorf("error sending event: %v", err) 402 } 403 } 404 } 405 } 406 407 // parseEvent parses an event from the binlog and converts it to a list of VEvents. 408 func (vs *vstreamer) parseEvent(ev mysql.BinlogEvent) ([]*binlogdatapb.VEvent, error) { 409 if !ev.IsValid() { 410 return nil, fmt.Errorf("can't parse binlog event: invalid data: %#v", ev) 411 } 412 413 // We need to keep checking for FORMAT_DESCRIPTION_EVENT even after we've 414 // seen one, because another one might come along (e.g. on log rotate due to 415 // binlog settings change) that changes the format. 416 if ev.IsFormatDescription() { 417 var err error 418 vs.format, err = ev.Format() 419 if err != nil { 420 return nil, fmt.Errorf("can't parse FORMAT_DESCRIPTION_EVENT: %v, event data: %#v", err, ev) 421 } 422 return nil, nil 423 } 424 425 // We can't parse anything until we get a FORMAT_DESCRIPTION_EVENT that 426 // tells us the size of the event header. 427 if vs.format.IsZero() { 428 // The only thing that should come before the FORMAT_DESCRIPTION_EVENT 429 // is a fake ROTATE_EVENT, which the primary sends to tell us the name 430 // of the current log file. 431 if ev.IsRotate() { 432 return nil, nil 433 } 434 return nil, fmt.Errorf("got a real event before FORMAT_DESCRIPTION_EVENT: %#v", ev) 435 } 436 437 // Strip the checksum, if any. We don't actually verify the checksum, so discard it. 438 ev, _, err := ev.StripChecksum(vs.format) 439 if err != nil { 440 return nil, fmt.Errorf("can't strip checksum from binlog event: %v, event data: %#v", err, ev) 441 } 442 443 var vevents []*binlogdatapb.VEvent 444 switch { 445 case ev.IsGTID(): 446 gtid, hasBegin, err := ev.GTID(vs.format) 447 if err != nil { 448 return nil, fmt.Errorf("can't get GTID from binlog event: %v, event data: %#v", err, ev) 449 } 450 if hasBegin { 451 vevents = append(vevents, &binlogdatapb.VEvent{ 452 Type: binlogdatapb.VEventType_BEGIN, 453 }) 454 } 455 vs.pos = mysql.AppendGTID(vs.pos, gtid) 456 case ev.IsXID(): 457 vevents = append(vevents, &binlogdatapb.VEvent{ 458 Type: binlogdatapb.VEventType_GTID, 459 Gtid: mysql.EncodePosition(vs.pos), 460 }, &binlogdatapb.VEvent{ 461 Type: binlogdatapb.VEventType_COMMIT, 462 }) 463 case ev.IsQuery(): 464 q, err := ev.Query(vs.format) 465 if err != nil { 466 return nil, fmt.Errorf("can't get query from binlog event: %v, event data: %#v", err, ev) 467 } 468 // Insert/Delete/Update are supported only to be used in the context of external mysql streams where source databases 469 // could be using SBR. Vitess itself will never run into cases where it needs to consume non rbr statements. 470 471 switch cat := sqlparser.Preview(q.SQL); cat { 472 case sqlparser.StmtInsert: 473 mustSend := mustSendStmt(q, vs.cp.DBName()) 474 if mustSend { 475 vevents = append(vevents, &binlogdatapb.VEvent{ 476 Type: binlogdatapb.VEventType_INSERT, 477 Dml: q.SQL, 478 }) 479 } 480 case sqlparser.StmtUpdate: 481 mustSend := mustSendStmt(q, vs.cp.DBName()) 482 if mustSend { 483 vevents = append(vevents, &binlogdatapb.VEvent{ 484 Type: binlogdatapb.VEventType_UPDATE, 485 Dml: q.SQL, 486 }) 487 } 488 case sqlparser.StmtDelete: 489 mustSend := mustSendStmt(q, vs.cp.DBName()) 490 if mustSend { 491 vevents = append(vevents, &binlogdatapb.VEvent{ 492 Type: binlogdatapb.VEventType_DELETE, 493 Dml: q.SQL, 494 }) 495 } 496 case sqlparser.StmtReplace: 497 mustSend := mustSendStmt(q, vs.cp.DBName()) 498 if mustSend { 499 vevents = append(vevents, &binlogdatapb.VEvent{ 500 Type: binlogdatapb.VEventType_REPLACE, 501 Dml: q.SQL, 502 }) 503 } 504 case sqlparser.StmtBegin: 505 vevents = append(vevents, &binlogdatapb.VEvent{ 506 Type: binlogdatapb.VEventType_BEGIN, 507 }) 508 case sqlparser.StmtCommit: 509 vevents = append(vevents, &binlogdatapb.VEvent{ 510 Type: binlogdatapb.VEventType_COMMIT, 511 }) 512 case sqlparser.StmtDDL: 513 if mustSendDDL(q, vs.cp.DBName(), vs.filter) { 514 vevents = append(vevents, &binlogdatapb.VEvent{ 515 Type: binlogdatapb.VEventType_GTID, 516 Gtid: mysql.EncodePosition(vs.pos), 517 }, &binlogdatapb.VEvent{ 518 Type: binlogdatapb.VEventType_DDL, 519 Statement: q.SQL, 520 }) 521 } else { 522 // If the DDL need not be sent, send a dummy OTHER event. 523 vevents = append(vevents, &binlogdatapb.VEvent{ 524 Type: binlogdatapb.VEventType_GTID, 525 Gtid: mysql.EncodePosition(vs.pos), 526 }, &binlogdatapb.VEvent{ 527 Type: binlogdatapb.VEventType_OTHER, 528 }) 529 } 530 if schema.MustReloadSchemaOnDDL(q.SQL, vs.cp.DBName()) { 531 vs.se.ReloadAt(context.Background(), vs.pos) 532 } 533 case sqlparser.StmtSavepoint: 534 // We currently completely skip `SAVEPOINT ...` statements. 535 // 536 // MySQL inserts `SAVEPOINT ...` statements into the binlog in row based, statement based 537 // and in mixed replication modes, but only ever writes `ROLLBACK TO ...` statements to the 538 // binlog in mixed or statement based replication modes. Without `ROLLBACK TO ...` statements, 539 // savepoints are side-effect free. 540 // 541 // Vitess only supports row based replication, so skipping the creation of savepoints 542 // reduces the amount of data send over to vplayer. 543 case sqlparser.StmtOther, sqlparser.StmtPriv, sqlparser.StmtSet, sqlparser.StmtComment, sqlparser.StmtFlush: 544 // These are either: 545 // 1) DBA statements like REPAIR that can be ignored. 546 // 2) Privilege-altering statements like GRANT/REVOKE 547 // that we want to keep out of the stream for now. 548 vevents = append(vevents, &binlogdatapb.VEvent{ 549 Type: binlogdatapb.VEventType_GTID, 550 Gtid: mysql.EncodePosition(vs.pos), 551 }, &binlogdatapb.VEvent{ 552 Type: binlogdatapb.VEventType_OTHER, 553 }) 554 default: 555 return nil, fmt.Errorf("unexpected statement type %s in row-based replication: %q", cat, q.SQL) 556 } 557 case ev.IsTableMap(): 558 // This is very frequent. It precedes every row event. 559 // If it's the first time for a table, we generate a FIELD 560 // event, and also cache the plan. Subsequent TableMap events 561 // for that table id don't generate VEvents. 562 // A schema change will result in a change in table id, which 563 // will generate a new plan and FIELD event. 564 id := ev.TableID(vs.format) 565 566 tm, err := ev.TableMap(vs.format) 567 if err != nil { 568 return nil, err 569 } 570 571 if plan, ok := vs.plans[id]; ok { 572 // When the underlying mysql server restarts the table map can change. 573 // Usually the vstreamer will also error out when this happens, and vstreamer re-initializes its table map. 574 // But if the vstreamer is not aware of the restart, we could get an id that matches one in the cache, but 575 // is for a different table. We then invalidate and recompute the plan for this id. 576 if plan == nil || plan.Table.Name == tm.Name { 577 return nil, nil 578 } 579 vs.plans[id] = nil 580 log.Infof("table map changed: id %d for %s has changed to %s", id, plan.Table.Name, tm.Name) 581 } 582 583 if tm.Database == "_vt" && tm.Name == "resharding_journal" { 584 // A journal is a special case that generates a JOURNAL event. 585 return nil, vs.buildJournalPlan(id, tm) 586 } else if tm.Database == "_vt" && tm.Name == "schema_version" && !vs.se.SkipMetaCheck { 587 // Generates a Version event when it detects that a schema is stored in the schema_version table. 588 return nil, vs.buildVersionPlan(id, tm) 589 } 590 if tm.Database != "" && tm.Database != vs.cp.DBName() { 591 vs.plans[id] = nil 592 return nil, nil 593 } 594 if vtschema.IsInternalOperationTableName(tm.Name) { // ignore tables created by onlineddl/gh-ost/pt-osc 595 vs.plans[id] = nil 596 return nil, nil 597 } 598 if !ruleMatches(tm.Name, vs.filter) { 599 return nil, nil 600 } 601 602 vevent, err := vs.buildTablePlan(id, tm) 603 if err != nil { 604 vs.vse.errorCounts.Add("TablePlan", 1) 605 return nil, err 606 } 607 if vevent != nil { 608 vevents = append(vevents, vevent) 609 } 610 case ev.IsWriteRows() || ev.IsDeleteRows() || ev.IsUpdateRows(): 611 // The existence of before and after images can be used to 612 // identify statement types. It's also possible that the 613 // before and after images end up going to different shards. 614 // If so, an update will be treated as delete on one shard 615 // and insert on the other. 616 id := ev.TableID(vs.format) 617 plan := vs.plans[id] 618 if plan == nil { 619 return nil, nil 620 } 621 rows, err := ev.Rows(vs.format, plan.TableMap) 622 if err != nil { 623 return nil, err 624 } 625 if id == vs.journalTableID { 626 vevents, err = vs.processJournalEvent(vevents, plan, rows) 627 } else if id == vs.versionTableID { 628 vs.se.RegisterVersionEvent() 629 vevent := &binlogdatapb.VEvent{ 630 Type: binlogdatapb.VEventType_VERSION, 631 } 632 vevents = append(vevents, vevent) 633 634 } else { 635 vevents, err = vs.processRowEvent(vevents, plan, rows) 636 } 637 if err != nil { 638 return nil, err 639 } 640 case ev.IsCompressed(): 641 log.Errorf("VReplication does not handle binlog compression") 642 return nil, fmt.Errorf("VReplication does not handle binlog compression") 643 } 644 for _, vevent := range vevents { 645 vevent.Timestamp = int64(ev.Timestamp()) 646 vevent.CurrentTime = time.Now().UnixNano() 647 } 648 return vevents, nil 649 } 650 651 func (vs *vstreamer) buildJournalPlan(id uint64, tm *mysql.TableMap) error { 652 conn, err := vs.cp.Connect(vs.ctx) 653 if err != nil { 654 return err 655 } 656 defer conn.Close() 657 qr, err := conn.ExecuteFetch("select * from _vt.resharding_journal where 1 != 1", 1, true) 658 if err != nil { 659 return err 660 } 661 fields := qr.Fields 662 if len(fields) < len(tm.Types) { 663 return fmt.Errorf("cannot determine table columns for %s: event has %v, schema has %v", tm.Name, tm.Types, fields) 664 } 665 table := &Table{ 666 Name: "_vt.resharding_journal", 667 Fields: fields[:len(tm.Types)], 668 } 669 // Build a normal table plan, which means, return all rows 670 // and columns as is. Special handling is done when we actually 671 // receive the row event. We'll build a JOURNAL event instead. 672 plan, err := buildREPlan(table, nil, "") 673 if err != nil { 674 return err 675 } 676 vs.plans[id] = &streamerPlan{ 677 Plan: plan, 678 TableMap: tm, 679 } 680 vs.journalTableID = id 681 return nil 682 } 683 684 func (vs *vstreamer) buildVersionPlan(id uint64, tm *mysql.TableMap) error { 685 conn, err := vs.cp.Connect(vs.ctx) 686 if err != nil { 687 return err 688 } 689 defer conn.Close() 690 qr, err := conn.ExecuteFetch("select * from _vt.schema_version where 1 != 1", 1, true) 691 if err != nil { 692 return err 693 } 694 fields := qr.Fields 695 if len(fields) < len(tm.Types) { 696 return fmt.Errorf("cannot determine table columns for %s: event has %v, schema has %v", tm.Name, tm.Types, fields) 697 } 698 table := &Table{ 699 Name: "_vt.schema_version", 700 Fields: fields[:len(tm.Types)], 701 } 702 // Build a normal table plan, which means, return all rows 703 // and columns as is. Special handling is done when we actually 704 // receive the row event. We'll build a JOURNAL event instead. 705 plan, err := buildREPlan(table, nil, "") 706 if err != nil { 707 return err 708 } 709 vs.plans[id] = &streamerPlan{ 710 Plan: plan, 711 TableMap: tm, 712 } 713 vs.versionTableID = id 714 return nil 715 } 716 717 func (vs *vstreamer) buildTablePlan(id uint64, tm *mysql.TableMap) (*binlogdatapb.VEvent, error) { 718 cols, err := vs.buildTableColumns(tm) 719 if err != nil { 720 return nil, err 721 } 722 723 table := &Table{ 724 Name: tm.Name, 725 Fields: cols, 726 } 727 plan, err := buildPlan(table, vs.vschema, vs.filter) 728 if err != nil { 729 return nil, err 730 } 731 if plan == nil { 732 vs.plans[id] = nil 733 return nil, nil 734 } 735 vs.plans[id] = &streamerPlan{ 736 Plan: plan, 737 TableMap: tm, 738 } 739 return &binlogdatapb.VEvent{ 740 Type: binlogdatapb.VEventType_FIELD, 741 FieldEvent: &binlogdatapb.FieldEvent{ 742 TableName: plan.Table.Name, 743 Fields: plan.fields(), 744 Keyspace: vs.vse.keyspace, 745 Shard: vs.vse.shard, 746 }, 747 }, nil 748 } 749 750 func (vs *vstreamer) buildTableColumns(tm *mysql.TableMap) ([]*querypb.Field, error) { 751 var fields []*querypb.Field 752 for i, typ := range tm.Types { 753 t, err := sqltypes.MySQLToType(int64(typ), 0) 754 if err != nil { 755 return nil, fmt.Errorf("unsupported type: %d, position: %d", typ, i) 756 } 757 fields = append(fields, &querypb.Field{ 758 Name: fmt.Sprintf("@%d", i+1), 759 Type: t, 760 }) 761 } 762 st, err := vs.se.GetTableForPos(sqlparser.NewIdentifierCS(tm.Name), mysql.EncodePosition(vs.pos)) 763 if err != nil { 764 if vs.filter.FieldEventMode == binlogdatapb.Filter_ERR_ON_MISMATCH { 765 log.Infof("No schema found for table %s", tm.Name) 766 return nil, fmt.Errorf("unknown table %v in schema", tm.Name) 767 } 768 return fields, nil 769 } 770 771 if len(st.Fields) < len(tm.Types) { 772 if vs.filter.FieldEventMode == binlogdatapb.Filter_ERR_ON_MISMATCH { 773 log.Infof("Cannot determine columns for table %s", tm.Name) 774 return nil, fmt.Errorf("cannot determine table columns for %s: event has %v, schema has %v", tm.Name, tm.Types, st.Fields) 775 } 776 return fields, nil 777 } 778 779 // check if the schema returned by schema.Engine matches with row. 780 for i := range tm.Types { 781 if !sqltypes.AreTypesEquivalent(fields[i].Type, st.Fields[i].Type) { 782 return fields, nil 783 } 784 } 785 786 // Columns should be truncated to match those in tm. 787 fields = st.Fields[:len(tm.Types)] 788 extColInfos, err := vs.getExtColInfos(tm.Name, tm.Database) 789 if err != nil { 790 return nil, err 791 } 792 for _, field := range fields { 793 // we want the MySQL column type info so that we can properly handle 794 // ambiguous binlog events and other cases where the internal types 795 // don't match the MySQL column type. One example being that in binlog 796 // events CHAR columns with a binary collation are indistinguishable 797 // from BINARY columns. 798 if extColInfo, ok := extColInfos[field.Name]; ok { 799 field.ColumnType = extColInfo.columnType 800 } 801 } 802 return fields, nil 803 } 804 805 // additional column attributes from information_schema.columns. Currently only column_type is used, but 806 // we expect to add more in the future 807 type extColInfo struct { 808 columnType string 809 } 810 811 func encodeString(in string) string { 812 buf := bytes.NewBuffer(nil) 813 sqltypes.NewVarChar(in).EncodeSQL(buf) 814 return buf.String() 815 } 816 817 func (vs *vstreamer) getExtColInfos(table, database string) (map[string]*extColInfo, error) { 818 extColInfos := make(map[string]*extColInfo) 819 conn, err := vs.cp.Connect(vs.ctx) 820 if err != nil { 821 return nil, err 822 } 823 defer conn.Close() 824 queryTemplate := "select column_name, column_type from information_schema.columns where table_schema=%s and table_name=%s;" 825 query := fmt.Sprintf(queryTemplate, encodeString(database), encodeString(table)) 826 qr, err := conn.ExecuteFetch(query, 10000, false) 827 if err != nil { 828 return nil, err 829 } 830 for _, row := range qr.Rows { 831 extColInfo := &extColInfo{ 832 columnType: row[1].ToString(), 833 } 834 extColInfos[row[0].ToString()] = extColInfo 835 } 836 return extColInfos, nil 837 } 838 839 func (vs *vstreamer) processJournalEvent(vevents []*binlogdatapb.VEvent, plan *streamerPlan, rows mysql.Rows) ([]*binlogdatapb.VEvent, error) { 840 // Get DbName 841 params, err := vs.cp.MysqlParams() 842 if err != nil { 843 return nil, err 844 } 845 nextrow: 846 for _, row := range rows.Rows { 847 afterOK, afterValues, err := vs.extractRowAndFilter(plan, row.Data, rows.DataColumns, row.NullColumns) 848 if err != nil { 849 return nil, err 850 } 851 if !afterOK { 852 // This can happen if someone manually deleted rows. 853 continue 854 } 855 // Exclude events that don't match the db_name. 856 for i, fld := range plan.fields() { 857 if fld.Name == "db_name" && afterValues[i].ToString() != params.DbName { 858 continue nextrow 859 } 860 } 861 for i, fld := range plan.fields() { 862 if fld.Name != "val" { 863 continue 864 } 865 journal := &binlogdatapb.Journal{} 866 avBytes, err := afterValues[i].ToBytes() 867 if err != nil { 868 return nil, err 869 } 870 if err := prototext.Unmarshal(avBytes, journal); err != nil { 871 return nil, err 872 } 873 vevents = append(vevents, &binlogdatapb.VEvent{ 874 Type: binlogdatapb.VEventType_JOURNAL, 875 Journal: journal, 876 }) 877 } 878 } 879 return vevents, nil 880 } 881 882 func (vs *vstreamer) processRowEvent(vevents []*binlogdatapb.VEvent, plan *streamerPlan, rows mysql.Rows) ([]*binlogdatapb.VEvent, error) { 883 rowChanges := make([]*binlogdatapb.RowChange, 0, len(rows.Rows)) 884 for _, row := range rows.Rows { 885 beforeOK, beforeValues, err := vs.extractRowAndFilter(plan, row.Identify, rows.IdentifyColumns, row.NullIdentifyColumns) 886 if err != nil { 887 return nil, err 888 } 889 afterOK, afterValues, err := vs.extractRowAndFilter(plan, row.Data, rows.DataColumns, row.NullColumns) 890 if err != nil { 891 return nil, err 892 } 893 if !beforeOK && !afterOK { 894 continue 895 } 896 rowChange := &binlogdatapb.RowChange{} 897 if beforeOK { 898 rowChange.Before = sqltypes.RowToProto3(beforeValues) 899 } 900 if afterOK { 901 rowChange.After = sqltypes.RowToProto3(afterValues) 902 } 903 rowChanges = append(rowChanges, rowChange) 904 } 905 if len(rowChanges) != 0 { 906 vevents = append(vevents, &binlogdatapb.VEvent{ 907 Type: binlogdatapb.VEventType_ROW, 908 RowEvent: &binlogdatapb.RowEvent{ 909 TableName: plan.Table.Name, 910 RowChanges: rowChanges, 911 Keyspace: vs.vse.keyspace, 912 Shard: vs.vse.shard, 913 }, 914 }) 915 } 916 return vevents, nil 917 } 918 919 func (vs *vstreamer) rebuildPlans() error { 920 for id, plan := range vs.plans { 921 if plan == nil { 922 // If a table has no plan, a vschema change will not 923 // cause that to change. 924 continue 925 } 926 newPlan, err := buildPlan(plan.Table, vs.vschema, vs.filter) 927 if err != nil { 928 return err 929 } 930 if newPlan == nil { 931 continue 932 } 933 vs.plans[id] = &streamerPlan{ 934 Plan: newPlan, 935 TableMap: plan.TableMap, 936 } 937 } 938 return nil 939 } 940 941 func (vs *vstreamer) extractRowAndFilter(plan *streamerPlan, data []byte, dataColumns, nullColumns mysql.Bitmap) (bool, []sqltypes.Value, error) { 942 if len(data) == 0 { 943 return false, nil, nil 944 } 945 values := make([]sqltypes.Value, dataColumns.Count()) 946 charsets := make([]collations.ID, len(values)) 947 valueIndex := 0 948 pos := 0 949 for colNum := 0; colNum < dataColumns.Count(); colNum++ { 950 if !dataColumns.Bit(colNum) { 951 return false, nil, fmt.Errorf("partial row image encountered: ensure binlog_row_image is set to 'full'") 952 } 953 if nullColumns.Bit(valueIndex) { 954 valueIndex++ 955 continue 956 } 957 value, l, err := mysql.CellValue(data, pos, plan.TableMap.Types[colNum], plan.TableMap.Metadata[colNum], plan.Table.Fields[colNum]) 958 if err != nil { 959 log.Errorf("extractRowAndFilter: %s, table: %s, colNum: %d, fields: %+v, current values: %+v", 960 err, plan.Table.Name, colNum, plan.Table.Fields, values) 961 return false, nil, err 962 } 963 pos += l 964 965 charsets[colNum] = collations.ID(plan.Table.Fields[colNum].Charset) 966 values[colNum] = value 967 valueIndex++ 968 } 969 filtered := make([]sqltypes.Value, len(plan.ColExprs)) 970 ok, err := plan.filter(values, filtered, charsets) 971 return ok, filtered, err 972 } 973 974 func wrapError(err error, stopPos mysql.Position, vse *Engine) error { 975 if err != nil { 976 vse.vstreamersEndedWithErrors.Add(1) 977 vse.errorCounts.Add("StreamEnded", 1) 978 err = fmt.Errorf("stream (at source tablet) error @ %v: %v", stopPos, err) 979 log.Error(err) 980 return err 981 } 982 log.Infof("stream (at source tablet) ended @ %v", stopPos) 983 return nil 984 }