vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/vstreamer/uvstreamer.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vstreamer 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "regexp" 24 "sort" 25 "strings" 26 "sync" 27 "time" 28 29 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 30 31 "vitess.io/vitess/go/mysql" 32 "vitess.io/vitess/go/vt/dbconfigs" 33 "vitess.io/vitess/go/vt/key" 34 "vitess.io/vitess/go/vt/log" 35 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 36 querypb "vitess.io/vitess/go/vt/proto/query" 37 "vitess.io/vitess/go/vt/sqlparser" 38 "vitess.io/vitess/go/vt/vterrors" 39 "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" 40 ) 41 42 var uvstreamerTestMode = false // Only used for testing 43 44 type tablePlan struct { 45 tablePK *binlogdatapb.TableLastPK 46 rule *binlogdatapb.Rule 47 } 48 49 type uvstreamer struct { 50 ctx context.Context 51 cancel func() 52 53 // input parameters 54 vse *Engine 55 send func([]*binlogdatapb.VEvent) error 56 cp dbconfigs.Connector 57 se *schema.Engine 58 startPos string 59 filter *binlogdatapb.Filter 60 inTablePKs []*binlogdatapb.TableLastPK 61 62 vschema *localVSchema 63 64 // map holds tables remaining to be fully copied, it is depleted as each table gets completely copied 65 plans map[string]*tablePlan 66 tablesToCopy []string 67 68 // changes for each table being copied 69 fields []*querypb.Field 70 pkfields []*querypb.Field 71 72 // current position in the binlog for this streamer 73 pos mysql.Position 74 75 // fast forward uses this to stop replicating upto the point of the last snapshot 76 stopPos mysql.Position 77 78 // lastTimestampNs is the last timestamp seen so far. 79 lastTimestampNs int64 80 ReplicationLagSeconds int64 81 mu sync.Mutex 82 83 config *uvstreamerConfig 84 85 vs *vstreamer //last vstreamer created in uvstreamer 86 } 87 88 type uvstreamerConfig struct { 89 MaxReplicationLag time.Duration 90 CatchupRetryTime time.Duration 91 } 92 93 func newUVStreamer(ctx context.Context, vse *Engine, cp dbconfigs.Connector, se *schema.Engine, startPos string, tablePKs []*binlogdatapb.TableLastPK, filter *binlogdatapb.Filter, vschema *localVSchema, send func([]*binlogdatapb.VEvent) error) *uvstreamer { 94 ctx, cancel := context.WithCancel(ctx) 95 config := &uvstreamerConfig{ 96 MaxReplicationLag: 1 * time.Nanosecond, 97 CatchupRetryTime: 1 * time.Second, 98 } 99 send2 := func(evs []*binlogdatapb.VEvent) error { 100 vse.vstreamerEventsStreamed.Add(int64(len(evs))) 101 for _, ev := range evs { 102 ev.Keyspace = vse.keyspace 103 ev.Shard = vse.shard 104 } 105 return send(evs) 106 } 107 uvs := &uvstreamer{ 108 ctx: ctx, 109 cancel: cancel, 110 vse: vse, 111 send: send2, 112 cp: cp, 113 se: se, 114 startPos: startPos, 115 filter: filter, 116 vschema: vschema, 117 config: config, 118 inTablePKs: tablePKs, 119 } 120 121 return uvs 122 } 123 124 // buildTablePlan identifies the tables for the copy phase and creates the plans which consist of the lastPK seen 125 // for a table and its Rule (for filtering purposes by the vstreamer engine) 126 // it can be called 127 // 128 // the first time, with just the filter and an empty pos 129 // during a restart, with both the filter and list of TableLastPK from the vgtid 130 func (uvs *uvstreamer) buildTablePlan() error { 131 uvs.plans = make(map[string]*tablePlan) 132 tableLastPKs := make(map[string]*binlogdatapb.TableLastPK) 133 for _, tablePK := range uvs.inTablePKs { 134 tableLastPKs[tablePK.TableName] = tablePK 135 } 136 tables := uvs.se.GetSchema() 137 for range tables { 138 for _, rule := range uvs.filter.Rules { 139 if !strings.HasPrefix(rule.Match, "/") { 140 _, ok := tables[rule.Match] 141 if !ok { 142 return fmt.Errorf("table %s is not present in the database", rule.Match) 143 } 144 } 145 } 146 } 147 for tableName := range tables { 148 rule, err := matchTable(tableName, uvs.filter, tables) 149 if err != nil { 150 return err 151 } 152 if rule == nil { 153 continue 154 } 155 plan := &tablePlan{ 156 tablePK: nil, 157 rule: &binlogdatapb.Rule{ 158 Filter: rule.Filter, 159 Match: rule.Match, 160 }, 161 } 162 tablePK, ok := tableLastPKs[tableName] 163 if !ok { 164 tablePK = &binlogdatapb.TableLastPK{ 165 TableName: tableName, 166 Lastpk: nil, 167 } 168 } 169 plan.tablePK = tablePK 170 uvs.plans[tableName] = plan 171 uvs.tablesToCopy = append(uvs.tablesToCopy, tableName) 172 173 } 174 sort.Strings(uvs.tablesToCopy) 175 return nil 176 } 177 178 // check which rule matches table, validate table is in schema 179 func matchTable(tableName string, filter *binlogdatapb.Filter, tables map[string]*schema.Table) (*binlogdatapb.Rule, error) { 180 if tableName == "dual" { 181 return nil, nil 182 } 183 found := false 184 for _, rule := range filter.Rules { 185 186 switch { 187 case tableName == rule.Match: 188 found = true 189 case strings.HasPrefix(rule.Match, "/"): 190 expr := strings.Trim(rule.Match, "/") 191 result, err := regexp.MatchString(expr, tableName) 192 if err != nil { 193 return nil, err 194 } 195 if !result { 196 continue 197 } 198 found = true 199 } 200 if found { 201 return &binlogdatapb.Rule{ 202 Match: tableName, 203 Filter: getQuery(tableName, rule.Filter), 204 }, nil 205 } 206 } 207 208 return nil, nil 209 } 210 211 // generate equivalent select statement if filter is empty or a keyrange. 212 func getQuery(tableName string, filter string) string { 213 query := filter 214 switch { 215 case filter == "": 216 buf := sqlparser.NewTrackedBuffer(nil) 217 buf.Myprintf("select * from %v", sqlparser.NewIdentifierCS(tableName)) 218 query = buf.String() 219 case key.IsKeyRange(filter): 220 buf := sqlparser.NewTrackedBuffer(nil) 221 buf.Myprintf("select * from %v where in_keyrange(%v)", 222 sqlparser.NewIdentifierCS(tableName), sqlparser.NewStrLiteral(filter)) 223 query = buf.String() 224 } 225 return query 226 } 227 228 func (uvs *uvstreamer) Cancel() { 229 log.Infof("uvstreamer context is being cancelled") 230 uvs.cancel() 231 } 232 233 // We have not yet implemented the logic to check if an event is for a row that is already copied, 234 // so we always return true so that we send all events for this table and so we don't miss events. 235 func (uvs *uvstreamer) isRowCopied(tableName string, ev *binlogdatapb.VEvent) bool { 236 return true 237 } 238 239 // Only send catchup/fastforward events for tables whose copy phase is complete or in progress. 240 // This ensures we fulfill the at-least-once delivery semantics for events. 241 // TODO: filter out events for rows not yet copied. Note that we can only do this as a best-effort 242 // for comparable PKs. 243 func (uvs *uvstreamer) shouldSendEventForTable(tableName string, ev *binlogdatapb.VEvent) bool { 244 table, ok := uvs.plans[tableName] 245 // Event is for a table which is not in its copy phase. 246 if !ok { 247 return true 248 } 249 250 // if table copy was not started and no tablePK was specified we can ignore catchup/fastforward events for it 251 if table.tablePK == nil || table.tablePK.Lastpk == nil { 252 return false 253 } 254 255 // Table is currently in its copy phase. We have not yet implemented the logic to 256 // check if an event is for a row that is already copied, so we always return true 257 // there so that we don't miss events. 258 // We may send duplicate insert events or update/delete events for rows not yet seen 259 // to the client for the table being copied. This is ok as the client is expected to be 260 // idempotent: we only promise at-least-once semantics for VStream API (not exactly-once). 261 // Aside: vreplication workflows handle at-least-once by adding where clauses that render 262 // DML queries, related to events for rows not yet copied, as no-ops. 263 return uvs.isRowCopied(tableName, ev) 264 } 265 266 // Do not send internal heartbeat events. Filter out events for tables whose copy has not been started. 267 func (uvs *uvstreamer) filterEvents(evs []*binlogdatapb.VEvent) []*binlogdatapb.VEvent { 268 if len(uvs.plans) == 0 { 269 return evs 270 } 271 var evs2 []*binlogdatapb.VEvent 272 var tableName string 273 var shouldSend bool 274 275 for _, ev := range evs { 276 switch ev.Type { 277 case binlogdatapb.VEventType_ROW: 278 tableName = ev.RowEvent.TableName 279 case binlogdatapb.VEventType_FIELD: 280 tableName = ev.FieldEvent.TableName 281 default: 282 tableName = "" 283 } 284 switch ev.Type { 285 case binlogdatapb.VEventType_HEARTBEAT: 286 shouldSend = false 287 default: 288 shouldSend = uvs.shouldSendEventForTable(tableName, ev) 289 } 290 291 if shouldSend { 292 evs2 = append(evs2, ev) 293 } 294 } 295 return evs2 296 } 297 298 // wraps the send parameter and filters events. called by fastforward/catchup 299 func (uvs *uvstreamer) send2(evs []*binlogdatapb.VEvent) error { 300 if len(evs) == 0 { 301 return nil 302 } 303 ev := evs[len(evs)-1] 304 if ev.Timestamp != 0 { 305 uvs.lastTimestampNs = ev.Timestamp * 1e9 306 } 307 behind := time.Now().UnixNano() - uvs.lastTimestampNs 308 uvs.setReplicationLagSeconds(behind / 1e9) 309 //log.Infof("sbm set to %d", uvs.ReplicationLagSeconds) 310 var evs2 []*binlogdatapb.VEvent 311 if len(uvs.plans) > 0 { 312 evs2 = uvs.filterEvents(evs) 313 } 314 err := uvs.send(evs2) 315 if err != nil && err != io.EOF { 316 return err 317 } 318 for _, ev := range evs2 { 319 if ev.Type == binlogdatapb.VEventType_GTID { 320 uvs.pos, _ = mysql.DecodePosition(ev.Gtid) 321 if !uvs.stopPos.IsZero() && uvs.pos.AtLeast(uvs.stopPos) { 322 err = io.EOF 323 } 324 } 325 } 326 if err != nil { 327 uvs.vse.errorCounts.Add("Send", 1) 328 } 329 return err 330 } 331 332 func (uvs *uvstreamer) sendEventsForCurrentPos() error { 333 log.Infof("sendEventsForCurrentPos") 334 evs := []*binlogdatapb.VEvent{{ 335 Type: binlogdatapb.VEventType_GTID, 336 Gtid: mysql.EncodePosition(uvs.pos), 337 }, { 338 Type: binlogdatapb.VEventType_OTHER, 339 }} 340 if err := uvs.send(evs); err != nil { 341 return wrapError(err, uvs.pos, uvs.vse) 342 } 343 return nil 344 } 345 346 func (uvs *uvstreamer) setStreamStartPosition() error { 347 curPos, err := uvs.currentPosition() 348 if err != nil { 349 return vterrors.Wrap(err, "could not obtain current position") 350 } 351 if uvs.startPos == "current" { 352 uvs.pos = curPos 353 if err := uvs.sendEventsForCurrentPos(); err != nil { 354 return err 355 } 356 return nil 357 } 358 pos, err := mysql.DecodePosition(uvs.startPos) 359 if err != nil { 360 return vterrors.Wrap(err, "could not decode position") 361 } 362 if !curPos.AtLeast(pos) { 363 uvs.vse.errorCounts.Add("GTIDSet Mismatch", 1) 364 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, 365 "GTIDSet Mismatch: requested source position:%v, current target vrep position: %v", 366 mysql.EncodePosition(pos), mysql.EncodePosition(curPos)) 367 } 368 uvs.pos = pos 369 return nil 370 } 371 372 func (uvs *uvstreamer) currentPosition() (mysql.Position, error) { 373 conn, err := uvs.cp.Connect(uvs.ctx) 374 if err != nil { 375 return mysql.Position{}, err 376 } 377 defer conn.Close() 378 return conn.PrimaryPosition() 379 } 380 381 // Possible states: 382 // 1. TablePKs nil, startPos set to gtid or "current" => start replicating from pos 383 // 2. TablePKs nil, startPos empty => full table copy of tables matching filter 384 // 3. TablePKs not nil, startPos empty => table copy (for pks > lastPK) 385 // 4. TablePKs not nil, startPos set => run catchup from startPos, then table copy (for pks > lastPK) 386 func (uvs *uvstreamer) init() error { 387 if uvs.startPos == "" /* full copy */ || len(uvs.inTablePKs) > 0 /* resume copy */ { 388 if err := uvs.buildTablePlan(); err != nil { 389 return err 390 } 391 } 392 if uvs.startPos != "" { 393 if err := uvs.setStreamStartPosition(); err != nil { 394 return err 395 } 396 } 397 if uvs.pos.IsZero() && (len(uvs.plans) == 0) { 398 return fmt.Errorf("stream needs a position or a table to copy") 399 } 400 return nil 401 } 402 403 // Stream streams binlog events. 404 func (uvs *uvstreamer) Stream() error { 405 log.Info("Stream() called") 406 if err := uvs.init(); err != nil { 407 return err 408 } 409 if len(uvs.plans) > 0 { 410 log.Info("TablePKs is not nil: starting vs.copy()") 411 if err := uvs.copy(uvs.ctx); err != nil { 412 log.Infof("uvstreamer.Stream() copy returned with err %s", err) 413 uvs.vse.errorCounts.Add("Copy", 1) 414 return err 415 } 416 if err := uvs.allCopyComplete(); err != nil { 417 return err 418 } 419 } 420 vs := newVStreamer(uvs.ctx, uvs.cp, uvs.se, mysql.EncodePosition(uvs.pos), mysql.EncodePosition(uvs.stopPos), 421 uvs.filter, uvs.getVSchema(), uvs.send, "replicate", uvs.vse) 422 423 uvs.setVs(vs) 424 return vs.Stream() 425 } 426 427 func (uvs *uvstreamer) lock(msg string) { 428 uvs.mu.Lock() 429 } 430 431 func (uvs *uvstreamer) unlock(msg string) { 432 uvs.mu.Unlock() 433 } 434 435 func (uvs *uvstreamer) setVs(vs *vstreamer) { 436 uvs.lock("setVs") 437 defer uvs.unlock("setVs") 438 uvs.vs = vs 439 } 440 441 // SetVSchema updates the vstreamer against the new vschema. 442 func (uvs *uvstreamer) SetVSchema(vschema *localVSchema) { 443 uvs.lock("SetVSchema") 444 defer uvs.unlock("SetVSchema") 445 uvs.vschema = vschema 446 if uvs.vs != nil { 447 uvs.vs.SetVSchema(vschema) 448 } 449 } 450 451 func (uvs *uvstreamer) getVSchema() *localVSchema { 452 uvs.lock("getVSchema") 453 defer uvs.unlock("getVSchema") 454 return uvs.vschema 455 } 456 457 func (uvs *uvstreamer) setCopyState(tableName string, qr *querypb.QueryResult) { 458 uvs.plans[tableName].tablePK.Lastpk = qr 459 } 460 461 func (uvs *uvstreamer) allCopyComplete() error { 462 ev := &binlogdatapb.VEvent{ 463 Type: binlogdatapb.VEventType_COPY_COMPLETED, 464 } 465 466 if err := uvs.send([]*binlogdatapb.VEvent{ev}); err != nil { 467 return err 468 } 469 return nil 470 } 471 472 // dummy event sent only in test mode 473 func (uvs *uvstreamer) sendTestEvent(msg string) { 474 if !uvstreamerTestMode { 475 return 476 } 477 ev := &binlogdatapb.VEvent{ 478 Type: binlogdatapb.VEventType_OTHER, 479 Gtid: msg, 480 } 481 482 if err := uvs.send([]*binlogdatapb.VEvent{ev}); err != nil { 483 return 484 } 485 } 486 487 func (uvs *uvstreamer) copyComplete(tableName string) error { 488 evs := []*binlogdatapb.VEvent{ 489 {Type: binlogdatapb.VEventType_BEGIN}, 490 { 491 Type: binlogdatapb.VEventType_LASTPK, 492 LastPKEvent: &binlogdatapb.LastPKEvent{ 493 TableLastPK: &binlogdatapb.TableLastPK{ 494 TableName: tableName, 495 Lastpk: nil, 496 }, 497 Completed: true, 498 }, 499 }, 500 {Type: binlogdatapb.VEventType_COMMIT}, 501 } 502 if err := uvs.send(evs); err != nil { 503 return err 504 } 505 506 delete(uvs.plans, tableName) 507 uvs.tablesToCopy = uvs.tablesToCopy[1:] 508 return nil 509 } 510 511 func (uvs *uvstreamer) setPosition(gtid string, isInTx bool) error { 512 if gtid == "" { 513 return fmt.Errorf("empty gtid passed to setPosition") 514 } 515 pos, err := mysql.DecodePosition(gtid) 516 if err != nil { 517 return err 518 } 519 if pos.Equal(uvs.pos) { 520 return nil 521 } 522 gtidEvent := &binlogdatapb.VEvent{ 523 Type: binlogdatapb.VEventType_GTID, 524 Gtid: gtid, 525 Keyspace: uvs.vse.keyspace, 526 Shard: uvs.vse.shard, 527 } 528 529 var evs []*binlogdatapb.VEvent 530 if !isInTx { 531 evs = append(evs, &binlogdatapb.VEvent{ 532 Type: binlogdatapb.VEventType_BEGIN, 533 Keyspace: uvs.vse.keyspace, 534 Shard: uvs.vse.shard, 535 }) 536 } 537 evs = append(evs, gtidEvent) 538 if !isInTx { 539 evs = append(evs, &binlogdatapb.VEvent{ 540 Type: binlogdatapb.VEventType_COMMIT, 541 Keyspace: uvs.vse.keyspace, 542 Shard: uvs.vse.shard, 543 }) 544 } 545 if err := uvs.send(evs); err != nil { 546 return err 547 } 548 uvs.pos = pos 549 return nil 550 } 551 552 func (uvs *uvstreamer) getReplicationLagSeconds() int64 { 553 uvs.mu.Lock() 554 defer uvs.mu.Unlock() 555 return uvs.ReplicationLagSeconds 556 } 557 558 func (uvs *uvstreamer) setReplicationLagSeconds(sbm int64) { 559 uvs.mu.Lock() 560 defer uvs.mu.Unlock() 561 uvs.ReplicationLagSeconds = sbm 562 }