vitess.io/vitess@v0.16.2/go/vt/binlog/binlog_streamer.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package binlog 18 19 import ( 20 "bytes" 21 "fmt" 22 "io" 23 "strings" 24 25 "google.golang.org/protobuf/proto" 26 27 "context" 28 29 "vitess.io/vitess/go/mysql" 30 "vitess.io/vitess/go/sqltypes" 31 "vitess.io/vitess/go/stats" 32 "vitess.io/vitess/go/vt/dbconfigs" 33 "vitess.io/vitess/go/vt/log" 34 "vitess.io/vitess/go/vt/sqlparser" 35 "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" 36 37 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 38 querypb "vitess.io/vitess/go/vt/proto/query" 39 ) 40 41 var ( 42 binlogStreamerErrors = stats.NewCountersWithSingleLabel("BinlogStreamerErrors", "error count when streaming binlog", "state") 43 44 // ErrClientEOF is returned by Streamer if the stream ended because the 45 // consumer of the stream indicated it doesn't want any more events. 46 ErrClientEOF = fmt.Errorf("binlog stream consumer ended the reply stream") 47 // ErrServerEOF is returned by Streamer if the stream ended because the 48 // connection to the mysqld server was lost, or the stream was terminated by 49 // mysqld. 50 ErrServerEOF = fmt.Errorf("binlog stream connection was closed by mysqld") 51 52 // statementPrefixes are normal sql statement prefixes. 53 statementPrefixes = map[string]binlogdatapb.BinlogTransaction_Statement_Category{ 54 "begin": binlogdatapb.BinlogTransaction_Statement_BL_BEGIN, 55 "commit": binlogdatapb.BinlogTransaction_Statement_BL_COMMIT, 56 "rollback": binlogdatapb.BinlogTransaction_Statement_BL_ROLLBACK, 57 "insert": binlogdatapb.BinlogTransaction_Statement_BL_INSERT, 58 "update": binlogdatapb.BinlogTransaction_Statement_BL_UPDATE, 59 "delete": binlogdatapb.BinlogTransaction_Statement_BL_DELETE, 60 "create": binlogdatapb.BinlogTransaction_Statement_BL_DDL, 61 "alter": binlogdatapb.BinlogTransaction_Statement_BL_DDL, 62 "drop": binlogdatapb.BinlogTransaction_Statement_BL_DDL, 63 "truncate": binlogdatapb.BinlogTransaction_Statement_BL_DDL, 64 "rename": binlogdatapb.BinlogTransaction_Statement_BL_DDL, 65 "set": binlogdatapb.BinlogTransaction_Statement_BL_SET, 66 } 67 ) 68 69 // FullBinlogStatement has all the information we can gather for an event. 70 // Some fields are only set if asked for, and if RBR is used. 71 // Otherwise we'll revert back to using the SQL comments, for SBR. 72 type FullBinlogStatement struct { 73 Statement *binlogdatapb.BinlogTransaction_Statement 74 Table string 75 KeyspaceID []byte 76 PKNames []*querypb.Field 77 PKValues []sqltypes.Value 78 } 79 80 // sendTransactionFunc is used to send binlog events. 81 // reply is of type binlogdatapb.BinlogTransaction. 82 type sendTransactionFunc func(eventToken *querypb.EventToken, statements []FullBinlogStatement) error 83 84 // getStatementCategory returns the binlogdatapb.BL_* category for a SQL statement. 85 func getStatementCategory(sql string) binlogdatapb.BinlogTransaction_Statement_Category { 86 if i := strings.IndexByte(sql, byte(' ')); i >= 0 { 87 sql = sql[:i] 88 } 89 return statementPrefixes[strings.ToLower(sql)] 90 } 91 92 // tableCacheEntry contains everything we know about a table. 93 // It is created when we get a TableMap event. 94 type tableCacheEntry struct { 95 // tm is what we get from a TableMap event. 96 tm *mysql.TableMap 97 98 // ti is the table descriptor we get from the schema engine. 99 ti *schema.Table 100 101 // The following fields are used if we want to extract the 102 // keyspace_id of a row. 103 104 // resolver is only set if Streamer.resolverFactory is set. 105 resolver keyspaceIDResolver 106 107 // keyspaceIDIndex is the index of the field that can be used 108 // to compute the keyspaceID. Set to -1 if no resolver is in used. 109 keyspaceIDIndex int 110 111 // The following fields are used if we want to extract the 112 // primary key of a row. 113 114 // pkNames contains an array of fields for the PK. 115 pkNames []*querypb.Field 116 117 // pkIndexes contains the index of a given column in the 118 // PK. It is -1 f the column is not in any PK. It contains as 119 // many fields as there are columns in the table. 120 // For instance, in a table defined like this: 121 // field1 varchar() 122 // pkpart2 int 123 // pkpart1 int 124 // pkIndexes would contain: [ 125 // -1 // field1 is not in the pk 126 // 1 // pkpart2 is the second part of the PK 127 // 0 // pkpart1 is the first part of the PK 128 // This array is built this way so when we extract the columns 129 // in a row, we can just save them in the PK array easily. 130 pkIndexes []int 131 } 132 133 // Streamer streams binlog events from MySQL. 134 // A Streamer should only be used once. To start another stream, call 135 // NewStreamer() again. 136 type Streamer struct { 137 // The following fields at set at creation and immutable. 138 cp dbconfigs.Connector 139 se *schema.Engine 140 resolverFactory keyspaceIDResolverFactory 141 extractPK bool 142 143 clientCharset *binlogdatapb.Charset 144 startPos mysql.Position 145 timestamp int64 146 sendTransaction sendTransactionFunc 147 usePreviousGTIDs bool 148 149 conn *BinlogConnection 150 } 151 152 // NewStreamer creates a binlog Streamer. 153 // 154 // dbname specifes the database to stream events for. 155 // mysqld is the local instance of mysqlctl.Mysqld. 156 // charset is the default character set on the BinlogPlayer side. 157 // startPos is the position to start streaming at. Incompatible with timestamp. 158 // timestamp is the timestamp to start streaming at. Incompatible with startPos. 159 // sendTransaction is called each time a transaction is committed or rolled back. 160 func NewStreamer(cp dbconfigs.Connector, se *schema.Engine, clientCharset *binlogdatapb.Charset, startPos mysql.Position, timestamp int64, sendTransaction sendTransactionFunc) *Streamer { 161 return &Streamer{ 162 cp: cp, 163 se: se, 164 clientCharset: clientCharset, 165 startPos: startPos, 166 timestamp: timestamp, 167 sendTransaction: sendTransaction, 168 } 169 } 170 171 // Stream starts streaming binlog events using the settings from NewStreamer(). 172 func (bls *Streamer) Stream(ctx context.Context) (err error) { 173 // Ensure se is Open. If vttablet came up in a non_serving role, 174 // the schema engine may not have been initialized. 175 if err := bls.se.Open(); err != nil { 176 return err 177 } 178 stopPos := bls.startPos 179 defer func() { 180 if err != nil && err != ErrBinlogUnavailable { 181 err = fmt.Errorf("stream error @ %v: %v", stopPos, err) 182 } 183 log.Infof("stream ended @ %v, err = %v", stopPos, err) 184 }() 185 186 if bls.conn, err = NewBinlogConnection(bls.cp); err != nil { 187 return err 188 } 189 defer bls.conn.Close() 190 191 // Check that the default charsets match, if the client specified one. 192 // Note that Streamer uses the settings for the 'dba' user, while 193 // BinlogPlayer uses the 'filtered' user, so those are the ones whose charset 194 // must match. Filtered replication should still succeed even with a default 195 // mismatch, since we pass per-statement charset info. However, Vitess in 196 // general doesn't support servers with different default charsets, so we 197 // treat it as a configuration error. 198 if bls.clientCharset != nil { 199 cs, err := mysql.GetCharset(bls.conn.Conn) 200 if err != nil { 201 return fmt.Errorf("can't get charset to check binlog stream: %v", err) 202 } 203 log.Infof("binlog stream client charset = %v, server charset = %v", bls.clientCharset, cs) 204 if !proto.Equal(cs, bls.clientCharset) { 205 return fmt.Errorf("binlog stream client charset (%v) doesn't match server (%v)", bls.clientCharset, cs) 206 } 207 } 208 209 var events <-chan mysql.BinlogEvent 210 var errs <-chan error 211 if bls.timestamp != 0 { 212 // MySQL 5.6 only: We are going to start reading the 213 // logs from the beginning of a binlog file. That is 214 // going to send us the PREVIOUS_GTIDS_EVENT that 215 // contains the starting GTIDSet, and we will save 216 // that as the current position. 217 bls.usePreviousGTIDs = true 218 events, errs, err = bls.conn.StartBinlogDumpFromBinlogBeforeTimestamp(ctx, bls.timestamp) 219 } else if !bls.startPos.IsZero() { 220 // MySQL 5.6 only: we are starting from a random 221 // binlog position. It turns out we will receive a 222 // PREVIOUS_GTIDS_EVENT event, that has a GTIDSet 223 // extracted from the binlogs. It is not related to 224 // the starting position we pass in, it seems it is 225 // just the PREVIOUS_GTIDS_EVENT from the file we're reading. 226 // So we have to skip it. 227 events, errs, err = bls.conn.StartBinlogDumpFromPosition(ctx, "", bls.startPos) 228 } else { 229 bls.startPos, events, errs, err = bls.conn.StartBinlogDumpFromCurrent(ctx) 230 } 231 if err != nil { 232 return err 233 } 234 235 // parseEvents will loop until the events channel is closed, the 236 // service enters the SHUTTING_DOWN state, or an error occurs. 237 stopPos, err = bls.parseEvents(ctx, events, errs) 238 return err 239 } 240 241 // parseEvents processes the raw binlog dump stream from the server, one event 242 // at a time, and groups them into transactions. It is called from within the 243 // service function launched by Stream(). 244 // 245 // If the sendTransaction func returns io.EOF, parseEvents returns ErrClientEOF. 246 // If the events channel is closed, parseEvents returns ErrServerEOF. 247 // If the context is done, returns ctx.Err(). 248 func (bls *Streamer) parseEvents(ctx context.Context, events <-chan mysql.BinlogEvent, errs <-chan error) (mysql.Position, error) { 249 var statements []FullBinlogStatement 250 var format mysql.BinlogFormat 251 var gtid mysql.GTID 252 var pos = bls.startPos 253 var autocommit = true 254 var err error 255 256 // Remember the RBR state. 257 // tableMaps is indexed by tableID. 258 tableMaps := make(map[uint64]*tableCacheEntry) 259 260 // A begin can be triggered either by a BEGIN query, or by a GTID_EVENT. 261 begin := func() { 262 if statements != nil { 263 // If this happened, it would be a legitimate error. 264 log.Errorf("BEGIN in binlog stream while still in another transaction; dropping %d statements: %v", len(statements), statements) 265 binlogStreamerErrors.Add("ParseEvents", 1) 266 } 267 statements = make([]FullBinlogStatement, 0, 10) 268 autocommit = false 269 } 270 // A commit can be triggered either by a COMMIT query, or by an XID_EVENT. 271 // Statements that aren't wrapped in BEGIN/COMMIT are committed immediately. 272 commit := func(timestamp uint32) error { 273 if int64(timestamp) >= bls.timestamp { 274 eventToken := &querypb.EventToken{ 275 Timestamp: int64(timestamp), 276 Position: mysql.EncodePosition(pos), 277 } 278 if err = bls.sendTransaction(eventToken, statements); err != nil { 279 if err == io.EOF { 280 return ErrClientEOF 281 } 282 return fmt.Errorf("send reply error: %v", err) 283 } 284 } 285 statements = nil 286 autocommit = true 287 return nil 288 } 289 290 // Parse events. 291 for { 292 var ev mysql.BinlogEvent 293 var ok bool 294 295 select { 296 case ev, ok = <-events: 297 if !ok { 298 // events channel has been closed, which means the connection died. 299 log.Infof("reached end of binlog event stream") 300 return pos, ErrServerEOF 301 } 302 case err = <-errs: 303 return pos, err 304 case <-ctx.Done(): 305 log.Infof("stopping early due to binlog Streamer service shutdown or client disconnect") 306 return pos, ctx.Err() 307 } 308 309 // Validate the buffer before reading fields from it. 310 if !ev.IsValid() { 311 return pos, fmt.Errorf("can't parse binlog event, invalid data: %#v", ev) 312 } 313 314 // We need to keep checking for FORMAT_DESCRIPTION_EVENT even after we've 315 // seen one, because another one might come along (e.g. on log rotate due to 316 // binlog settings change) that changes the format. 317 if ev.IsFormatDescription() { 318 format, err = ev.Format() 319 if err != nil { 320 return pos, fmt.Errorf("can't parse FORMAT_DESCRIPTION_EVENT: %v, event data: %#v", err, ev) 321 } 322 continue 323 } 324 325 // We can't parse anything until we get a FORMAT_DESCRIPTION_EVENT that 326 // tells us the size of the event header. 327 if format.IsZero() { 328 // The only thing that should come before the FORMAT_DESCRIPTION_EVENT 329 // is a fake ROTATE_EVENT, which the primary sends to tell us the name 330 // of the current log file. 331 if ev.IsRotate() { 332 continue 333 } 334 return pos, fmt.Errorf("got a real event before FORMAT_DESCRIPTION_EVENT: %#v", ev) 335 } 336 337 // Strip the checksum, if any. We don't actually verify the checksum, so discard it. 338 ev, _, err = ev.StripChecksum(format) 339 if err != nil { 340 return pos, fmt.Errorf("can't strip checksum from binlog event: %v, event data: %#v", err, ev) 341 } 342 343 switch { 344 case ev.IsPseudo(): 345 gtid, _, err = ev.GTID(format) 346 if err != nil { 347 return pos, fmt.Errorf("can't get GTID from binlog event: %v, event data: %#v", err, ev) 348 } 349 oldpos := pos 350 pos = mysql.AppendGTID(pos, gtid) 351 // If the event is received outside of a transaction, it must 352 // be sent. Otherwise, it will get lost and the targets will go out 353 // of sync. 354 if autocommit && !pos.Equal(oldpos) { 355 if err = commit(ev.Timestamp()); err != nil { 356 return pos, err 357 } 358 } 359 case ev.IsGTID(): // GTID_EVENT: update current GTID, maybe BEGIN. 360 var hasBegin bool 361 gtid, hasBegin, err = ev.GTID(format) 362 if err != nil { 363 return pos, fmt.Errorf("can't get GTID from binlog event: %v, event data: %#v", err, ev) 364 } 365 pos = mysql.AppendGTID(pos, gtid) 366 if hasBegin { 367 begin() 368 } 369 case ev.IsXID(): // XID_EVENT (equivalent to COMMIT) 370 if err = commit(ev.Timestamp()); err != nil { 371 return pos, err 372 } 373 case ev.IsIntVar(): // INTVAR_EVENT 374 typ, value, err := ev.IntVar(format) 375 if err != nil { 376 return pos, fmt.Errorf("can't parse INTVAR_EVENT: %v, event data: %#v", err, ev) 377 } 378 statements = append(statements, FullBinlogStatement{ 379 Statement: &binlogdatapb.BinlogTransaction_Statement{ 380 Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, 381 Sql: []byte(fmt.Sprintf("SET %s=%d", mysql.IntVarNames[typ], value)), 382 }, 383 }) 384 case ev.IsRand(): // RAND_EVENT 385 seed1, seed2, err := ev.Rand(format) 386 if err != nil { 387 return pos, fmt.Errorf("can't parse RAND_EVENT: %v, event data: %#v", err, ev) 388 } 389 statements = append(statements, FullBinlogStatement{ 390 Statement: &binlogdatapb.BinlogTransaction_Statement{ 391 Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, 392 Sql: []byte(fmt.Sprintf("SET @@RAND_SEED1=%d, @@RAND_SEED2=%d", seed1, seed2)), 393 }, 394 }) 395 case ev.IsQuery(): // QUERY_EVENT 396 // Extract the query string and group into transactions. 397 q, err := ev.Query(format) 398 if err != nil { 399 return pos, fmt.Errorf("can't get query from binlog event: %v, event data: %#v", err, ev) 400 } 401 switch cat := getStatementCategory(q.SQL); cat { 402 case binlogdatapb.BinlogTransaction_Statement_BL_BEGIN: 403 begin() 404 case binlogdatapb.BinlogTransaction_Statement_BL_ROLLBACK: 405 // Rollbacks are possible under some circumstances. Since the stream 406 // client keeps track of its replication position by updating the set 407 // of GTIDs it's seen, we must commit an empty transaction so the client 408 // can update its position. 409 statements = nil 410 fallthrough 411 case binlogdatapb.BinlogTransaction_Statement_BL_COMMIT: 412 if err = commit(ev.Timestamp()); err != nil { 413 return pos, err 414 } 415 default: // BL_DDL, BL_SET, BL_INSERT, BL_UPDATE, BL_DELETE, BL_UNRECOGNIZED 416 if q.Database != "" && q.Database != bls.cp.DBName() { 417 // Skip cross-db statements. 418 continue 419 } 420 setTimestamp := &binlogdatapb.BinlogTransaction_Statement{ 421 Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, 422 Sql: []byte(fmt.Sprintf("SET TIMESTAMP=%d", ev.Timestamp())), 423 } 424 statement := &binlogdatapb.BinlogTransaction_Statement{ 425 Category: cat, 426 Sql: []byte(q.SQL), 427 } 428 // If the statement has a charset and it's different than our client's 429 // default charset, send it along with the statement. 430 // If our client hasn't told us its charset, always send it. 431 if bls.clientCharset == nil || (q.Charset != nil && !proto.Equal(q.Charset, bls.clientCharset)) { 432 setTimestamp.Charset = q.Charset 433 statement.Charset = q.Charset 434 } 435 statements = append(statements, FullBinlogStatement{ 436 Statement: setTimestamp, 437 }, FullBinlogStatement{ 438 Statement: statement, 439 }) 440 if autocommit { 441 if err = commit(ev.Timestamp()); err != nil { 442 return pos, err 443 } 444 } 445 } 446 case ev.IsPreviousGTIDs(): // PREVIOUS_GTIDS_EVENT 447 // MySQL 5.6 only: The Binlogs contain an 448 // event that gives us all the previously 449 // applied commits. It is *not* an 450 // authoritative value, unless we started from 451 // the beginning of a binlog file. 452 if !bls.usePreviousGTIDs { 453 continue 454 } 455 newPos, err := ev.PreviousGTIDs(format) 456 if err != nil { 457 return pos, err 458 } 459 pos = newPos 460 if err = commit(ev.Timestamp()); err != nil { 461 return pos, err 462 } 463 case ev.IsTableMap(): 464 // Save all tables, even not in the same DB. 465 tableID := ev.TableID(format) 466 tm, err := ev.TableMap(format) 467 if err != nil { 468 return pos, err 469 } 470 // TODO(alainjobart) if table is already in map, 471 // just use it. 472 473 tce := &tableCacheEntry{ 474 tm: tm, 475 keyspaceIDIndex: -1, 476 } 477 tableMaps[tableID] = tce 478 479 // Check we're in the right database, and if so, fill 480 // in more data. 481 if tm.Database != "" && tm.Database != bls.cp.DBName() { 482 continue 483 } 484 485 // Find and fill in the table schema. 486 tce.ti = bls.se.GetTable(sqlparser.NewIdentifierCS(tm.Name)) 487 if tce.ti == nil { 488 return pos, fmt.Errorf("unknown table %v in schema", tm.Name) 489 } 490 491 // Fill in the resolver if needed. 492 if bls.resolverFactory != nil { 493 tce.keyspaceIDIndex, tce.resolver, err = bls.resolverFactory(tce.ti) 494 if err != nil { 495 return pos, fmt.Errorf("cannot find column to use to find keyspace_id for table %v", tm.Name) 496 } 497 } 498 499 // Fill in PK indexes if necessary. 500 if bls.extractPK { 501 tce.pkNames = make([]*querypb.Field, len(tce.ti.PKColumns)) 502 tce.pkIndexes = make([]int, len(tce.ti.Fields)) 503 for i := range tce.pkIndexes { 504 // Put -1 as default in here. 505 tce.pkIndexes[i] = -1 506 } 507 for i, c := range tce.ti.PKColumns { 508 // Patch in every PK column index. 509 tce.pkIndexes[c] = i 510 // Fill in pknames 511 tce.pkNames[i] = tce.ti.Fields[c] 512 } 513 } 514 case ev.IsWriteRows(): 515 tableID := ev.TableID(format) 516 tce, ok := tableMaps[tableID] 517 if !ok { 518 return pos, fmt.Errorf("unknown tableID %v in WriteRows event", tableID) 519 } 520 if tce.ti == nil { 521 // Skip cross-db statements. 522 continue 523 } 524 setTimestamp := &binlogdatapb.BinlogTransaction_Statement{ 525 Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, 526 Sql: []byte(fmt.Sprintf("SET TIMESTAMP=%d", ev.Timestamp())), 527 } 528 statements = append(statements, FullBinlogStatement{ 529 Statement: setTimestamp, 530 }) 531 532 rows, err := ev.Rows(format, tce.tm) 533 if err != nil { 534 return pos, err 535 } 536 537 statements = bls.appendInserts(statements, tce, &rows) 538 539 if autocommit { 540 if err = commit(ev.Timestamp()); err != nil { 541 return pos, err 542 } 543 } 544 case ev.IsUpdateRows(): 545 tableID := ev.TableID(format) 546 tce, ok := tableMaps[tableID] 547 if !ok { 548 return pos, fmt.Errorf("unknown tableID %v in UpdateRows event", tableID) 549 } 550 if tce.ti == nil { 551 // Skip cross-db statements. 552 continue 553 } 554 setTimestamp := &binlogdatapb.BinlogTransaction_Statement{ 555 Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, 556 Sql: []byte(fmt.Sprintf("SET TIMESTAMP=%d", ev.Timestamp())), 557 } 558 statements = append(statements, FullBinlogStatement{ 559 Statement: setTimestamp, 560 }) 561 562 rows, err := ev.Rows(format, tce.tm) 563 if err != nil { 564 return pos, err 565 } 566 567 statements = bls.appendUpdates(statements, tce, &rows) 568 569 if autocommit { 570 if err = commit(ev.Timestamp()); err != nil { 571 return pos, err 572 } 573 } 574 case ev.IsDeleteRows(): 575 tableID := ev.TableID(format) 576 tce, ok := tableMaps[tableID] 577 if !ok { 578 return pos, fmt.Errorf("unknown tableID %v in DeleteRows event", tableID) 579 } 580 if tce.ti == nil { 581 // Skip cross-db statements. 582 continue 583 } 584 setTimestamp := &binlogdatapb.BinlogTransaction_Statement{ 585 Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, 586 Sql: []byte(fmt.Sprintf("SET TIMESTAMP=%d", ev.Timestamp())), 587 } 588 statements = append(statements, FullBinlogStatement{ 589 Statement: setTimestamp, 590 }) 591 592 rows, err := ev.Rows(format, tce.tm) 593 if err != nil { 594 return pos, err 595 } 596 597 statements = bls.appendDeletes(statements, tce, &rows) 598 599 if autocommit { 600 if err = commit(ev.Timestamp()); err != nil { 601 return pos, err 602 } 603 } 604 } 605 } 606 } 607 608 func (bls *Streamer) appendInserts(statements []FullBinlogStatement, tce *tableCacheEntry, rows *mysql.Rows) []FullBinlogStatement { 609 for i := range rows.Rows { 610 sql := sqlparser.NewTrackedBuffer(nil) 611 sql.Myprintf("INSERT INTO %v SET ", sqlparser.NewIdentifierCS(tce.tm.Name)) 612 613 keyspaceIDCell, pkValues, err := writeValuesAsSQL(sql, tce, rows, i, tce.pkNames != nil) 614 if err != nil { 615 log.Warningf("writeValuesAsSQL(%v) failed: %v", i, err) 616 continue 617 } 618 619 // Fill in keyspace id if needed. 620 var ksid []byte 621 if tce.resolver != nil { 622 var err error 623 ksid, err = tce.resolver.keyspaceID(keyspaceIDCell) 624 if err != nil { 625 log.Warningf("resolver(%v) failed: %v", err) 626 } 627 } 628 629 statement := &binlogdatapb.BinlogTransaction_Statement{ 630 Category: binlogdatapb.BinlogTransaction_Statement_BL_INSERT, 631 Sql: []byte(sql.String()), 632 } 633 statements = append(statements, FullBinlogStatement{ 634 Statement: statement, 635 Table: tce.tm.Name, 636 KeyspaceID: ksid, 637 PKNames: tce.pkNames, 638 PKValues: pkValues, 639 }) 640 } 641 return statements 642 } 643 644 func (bls *Streamer) appendUpdates(statements []FullBinlogStatement, tce *tableCacheEntry, rows *mysql.Rows) []FullBinlogStatement { 645 for i := range rows.Rows { 646 sql := sqlparser.NewTrackedBuffer(nil) 647 sql.Myprintf("UPDATE %v SET ", sqlparser.NewIdentifierCS(tce.tm.Name)) 648 649 keyspaceIDCell, pkValues, err := writeValuesAsSQL(sql, tce, rows, i, tce.pkNames != nil) 650 if err != nil { 651 log.Warningf("writeValuesAsSQL(%v) failed: %v", i, err) 652 continue 653 } 654 655 sql.WriteString(" WHERE ") 656 657 if _, _, err := writeIdentifiersAsSQL(sql, tce, rows, i, false); err != nil { 658 log.Warningf("writeIdentifiesAsSQL(%v) failed: %v", i, err) 659 continue 660 } 661 662 // Fill in keyspace id if needed. 663 var ksid []byte 664 if tce.resolver != nil { 665 var err error 666 ksid, err = tce.resolver.keyspaceID(keyspaceIDCell) 667 if err != nil { 668 log.Warningf("resolver(%v) failed: %v", err) 669 } 670 } 671 672 update := &binlogdatapb.BinlogTransaction_Statement{ 673 Category: binlogdatapb.BinlogTransaction_Statement_BL_UPDATE, 674 Sql: []byte(sql.String()), 675 } 676 statements = append(statements, FullBinlogStatement{ 677 Statement: update, 678 Table: tce.tm.Name, 679 KeyspaceID: ksid, 680 PKNames: tce.pkNames, 681 PKValues: pkValues, 682 }) 683 } 684 return statements 685 } 686 687 func (bls *Streamer) appendDeletes(statements []FullBinlogStatement, tce *tableCacheEntry, rows *mysql.Rows) []FullBinlogStatement { 688 for i := range rows.Rows { 689 sql := sqlparser.NewTrackedBuffer(nil) 690 sql.Myprintf("DELETE FROM %v WHERE ", sqlparser.NewIdentifierCS(tce.tm.Name)) 691 692 keyspaceIDCell, pkValues, err := writeIdentifiersAsSQL(sql, tce, rows, i, tce.pkNames != nil) 693 if err != nil { 694 log.Warningf("writeIdentifiesAsSQL(%v) failed: %v", i, err) 695 continue 696 } 697 698 // Fill in keyspace id if needed. 699 var ksid []byte 700 if tce.resolver != nil { 701 var err error 702 ksid, err = tce.resolver.keyspaceID(keyspaceIDCell) 703 if err != nil { 704 log.Warningf("resolver(%v) failed: %v", err) 705 } 706 } 707 708 statement := &binlogdatapb.BinlogTransaction_Statement{ 709 Category: binlogdatapb.BinlogTransaction_Statement_BL_DELETE, 710 Sql: []byte(sql.String()), 711 } 712 statements = append(statements, FullBinlogStatement{ 713 Statement: statement, 714 Table: tce.tm.Name, 715 KeyspaceID: ksid, 716 PKNames: tce.pkNames, 717 PKValues: pkValues, 718 }) 719 } 720 return statements 721 } 722 723 // writeValuesAsSQL is a helper method to print the values as SQL in the 724 // provided bytes.Buffer. It also returns the value for the keyspaceIDColumn, 725 // and the array of values for the PK, if necessary. 726 func writeValuesAsSQL(sql *sqlparser.TrackedBuffer, tce *tableCacheEntry, rs *mysql.Rows, rowIndex int, getPK bool) (sqltypes.Value, []sqltypes.Value, error) { 727 valueIndex := 0 728 data := rs.Rows[rowIndex].Data 729 pos := 0 730 var keyspaceIDCell sqltypes.Value 731 var pkValues []sqltypes.Value 732 if getPK { 733 pkValues = make([]sqltypes.Value, len(tce.pkNames)) 734 } 735 736 if len(tce.ti.Fields) != rs.DataColumns.Count() { 737 err := fmt.Errorf("[%v] cached columns count[%d] mismatch binglog row [%d]", tce.ti.Name, len(tce.ti.Fields), rs.DataColumns.Count()) 738 return sqltypes.Value{}, nil, err 739 } 740 741 for c := 0; c < rs.DataColumns.Count(); c++ { 742 if !rs.DataColumns.Bit(c) { 743 continue 744 } 745 746 // Print a separator if needed, then print the name. 747 if valueIndex > 0 { 748 sql.WriteString(", ") 749 } 750 sql.Myprintf("%v", sqlparser.NewIdentifierCI(tce.ti.Fields[c].Name)) 751 sql.WriteByte('=') 752 753 if rs.Rows[rowIndex].NullColumns.Bit(valueIndex) { 754 // This column is represented, but its value is NULL. 755 sql.WriteString("NULL") 756 valueIndex++ 757 continue 758 } 759 760 // We have real data. 761 value, l, err := mysql.CellValue(data, pos, tce.tm.Types[c], tce.tm.Metadata[c], &querypb.Field{Type: tce.ti.Fields[c].Type}) 762 if err != nil { 763 return keyspaceIDCell, nil, err 764 } 765 vBytes, err := value.ToBytes() 766 if err != nil { 767 return sqltypes.Value{}, nil, err 768 } 769 if value.Type() == querypb.Type_TIMESTAMP && !bytes.HasPrefix(vBytes, mysql.ZeroTimestamp) { 770 // Values in the binary log are UTC. Let's convert them 771 // to whatever timezone the connection is using, 772 // so MySQL properly converts them back to UTC. 773 sql.WriteString("convert_tz(") 774 value.EncodeSQL(sql) 775 sql.WriteString(", '+00:00', @@session.time_zone)") 776 } else { 777 value.EncodeSQL(sql) 778 } 779 if c == tce.keyspaceIDIndex { 780 keyspaceIDCell = value 781 } 782 if getPK { 783 if tce.pkIndexes[c] != -1 { 784 pkValues[tce.pkIndexes[c]] = value 785 } 786 } 787 pos += l 788 valueIndex++ 789 } 790 791 return keyspaceIDCell, pkValues, nil 792 } 793 794 // writeIdentifiersAsSQL is a helper method to print the identifies as SQL in the 795 // provided bytes.Buffer. It also returns the value for the keyspaceIDColumn, 796 // and the array of values for the PK, if necessary. 797 func writeIdentifiersAsSQL(sql *sqlparser.TrackedBuffer, tce *tableCacheEntry, rs *mysql.Rows, rowIndex int, getPK bool) (sqltypes.Value, []sqltypes.Value, error) { 798 valueIndex := 0 799 data := rs.Rows[rowIndex].Identify 800 pos := 0 801 var keyspaceIDCell sqltypes.Value 802 var pkValues []sqltypes.Value 803 if getPK { 804 pkValues = make([]sqltypes.Value, len(tce.pkNames)) 805 } 806 for c := 0; c < rs.IdentifyColumns.Count(); c++ { 807 if !rs.IdentifyColumns.Bit(c) { 808 continue 809 } 810 811 // Print a separator if needed, then print the name. 812 if valueIndex > 0 { 813 sql.WriteString(" AND ") 814 } 815 sql.Myprintf("%v", sqlparser.NewIdentifierCI(tce.ti.Fields[c].Name)) 816 817 if rs.Rows[rowIndex].NullIdentifyColumns.Bit(valueIndex) { 818 // This column is represented, but its value is NULL. 819 sql.WriteString(" IS NULL") 820 valueIndex++ 821 continue 822 } 823 sql.WriteByte('=') 824 825 // We have real data. 826 value, l, err := mysql.CellValue(data, pos, tce.tm.Types[c], tce.tm.Metadata[c], &querypb.Field{Type: tce.ti.Fields[c].Type}) 827 if err != nil { 828 return keyspaceIDCell, nil, err 829 } 830 vBytes, err := value.ToBytes() 831 if err != nil { 832 return keyspaceIDCell, nil, err 833 } 834 if value.Type() == querypb.Type_TIMESTAMP && !bytes.HasPrefix(vBytes, mysql.ZeroTimestamp) { 835 // Values in the binary log are UTC. Let's convert them 836 // to whatever timezone the connection is using, 837 // so MySQL properly converts them back to UTC. 838 sql.WriteString("convert_tz(") 839 value.EncodeSQL(sql) 840 sql.WriteString(", '+00:00', @@session.time_zone)") 841 } else { 842 value.EncodeSQL(sql) 843 } 844 if c == tce.keyspaceIDIndex { 845 keyspaceIDCell = value 846 } 847 if getPK { 848 if tce.pkIndexes[c] != -1 { 849 pkValues[tce.pkIndexes[c]] = value 850 } 851 } 852 pos += l 853 valueIndex++ 854 } 855 856 return keyspaceIDCell, pkValues, nil 857 }