vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/vstreamer/uvstreamer.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vstreamer
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io"
    23  	"regexp"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    30  
    31  	"vitess.io/vitess/go/mysql"
    32  	"vitess.io/vitess/go/vt/dbconfigs"
    33  	"vitess.io/vitess/go/vt/key"
    34  	"vitess.io/vitess/go/vt/log"
    35  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    36  	querypb "vitess.io/vitess/go/vt/proto/query"
    37  	"vitess.io/vitess/go/vt/sqlparser"
    38  	"vitess.io/vitess/go/vt/vterrors"
    39  	"vitess.io/vitess/go/vt/vttablet/tabletserver/schema"
    40  )
    41  
    42  var uvstreamerTestMode = false // Only used for testing
    43  
    44  type tablePlan struct {
    45  	tablePK *binlogdatapb.TableLastPK
    46  	rule    *binlogdatapb.Rule
    47  }
    48  
    49  type uvstreamer struct {
    50  	ctx    context.Context
    51  	cancel func()
    52  
    53  	// input parameters
    54  	vse        *Engine
    55  	send       func([]*binlogdatapb.VEvent) error
    56  	cp         dbconfigs.Connector
    57  	se         *schema.Engine
    58  	startPos   string
    59  	filter     *binlogdatapb.Filter
    60  	inTablePKs []*binlogdatapb.TableLastPK
    61  
    62  	vschema *localVSchema
    63  
    64  	// map holds tables remaining to be fully copied, it is depleted as each table gets completely copied
    65  	plans        map[string]*tablePlan
    66  	tablesToCopy []string
    67  
    68  	// changes for each table being copied
    69  	fields   []*querypb.Field
    70  	pkfields []*querypb.Field
    71  
    72  	// current position in the binlog for this streamer
    73  	pos mysql.Position
    74  
    75  	// fast forward uses this to stop replicating upto the point of the last snapshot
    76  	stopPos mysql.Position
    77  
    78  	// lastTimestampNs is the last timestamp seen so far.
    79  	lastTimestampNs       int64
    80  	ReplicationLagSeconds int64
    81  	mu                    sync.Mutex
    82  
    83  	config *uvstreamerConfig
    84  
    85  	vs *vstreamer //last vstreamer created in uvstreamer
    86  }
    87  
    88  type uvstreamerConfig struct {
    89  	MaxReplicationLag time.Duration
    90  	CatchupRetryTime  time.Duration
    91  }
    92  
    93  func newUVStreamer(ctx context.Context, vse *Engine, cp dbconfigs.Connector, se *schema.Engine, startPos string, tablePKs []*binlogdatapb.TableLastPK, filter *binlogdatapb.Filter, vschema *localVSchema, send func([]*binlogdatapb.VEvent) error) *uvstreamer {
    94  	ctx, cancel := context.WithCancel(ctx)
    95  	config := &uvstreamerConfig{
    96  		MaxReplicationLag: 1 * time.Nanosecond,
    97  		CatchupRetryTime:  1 * time.Second,
    98  	}
    99  	send2 := func(evs []*binlogdatapb.VEvent) error {
   100  		vse.vstreamerEventsStreamed.Add(int64(len(evs)))
   101  		for _, ev := range evs {
   102  			ev.Keyspace = vse.keyspace
   103  			ev.Shard = vse.shard
   104  		}
   105  		return send(evs)
   106  	}
   107  	uvs := &uvstreamer{
   108  		ctx:        ctx,
   109  		cancel:     cancel,
   110  		vse:        vse,
   111  		send:       send2,
   112  		cp:         cp,
   113  		se:         se,
   114  		startPos:   startPos,
   115  		filter:     filter,
   116  		vschema:    vschema,
   117  		config:     config,
   118  		inTablePKs: tablePKs,
   119  	}
   120  
   121  	return uvs
   122  }
   123  
   124  // buildTablePlan identifies the tables for the copy phase and creates the plans which consist of the lastPK seen
   125  // for a table and its Rule (for filtering purposes by the vstreamer engine)
   126  // it can be called
   127  //
   128  //	the first time, with just the filter and an empty pos
   129  //	during a restart, with both the filter and list of TableLastPK from the vgtid
   130  func (uvs *uvstreamer) buildTablePlan() error {
   131  	uvs.plans = make(map[string]*tablePlan)
   132  	tableLastPKs := make(map[string]*binlogdatapb.TableLastPK)
   133  	for _, tablePK := range uvs.inTablePKs {
   134  		tableLastPKs[tablePK.TableName] = tablePK
   135  	}
   136  	tables := uvs.se.GetSchema()
   137  	for range tables {
   138  		for _, rule := range uvs.filter.Rules {
   139  			if !strings.HasPrefix(rule.Match, "/") {
   140  				_, ok := tables[rule.Match]
   141  				if !ok {
   142  					return fmt.Errorf("table %s is not present in the database", rule.Match)
   143  				}
   144  			}
   145  		}
   146  	}
   147  	for tableName := range tables {
   148  		rule, err := matchTable(tableName, uvs.filter, tables)
   149  		if err != nil {
   150  			return err
   151  		}
   152  		if rule == nil {
   153  			continue
   154  		}
   155  		plan := &tablePlan{
   156  			tablePK: nil,
   157  			rule: &binlogdatapb.Rule{
   158  				Filter: rule.Filter,
   159  				Match:  rule.Match,
   160  			},
   161  		}
   162  		tablePK, ok := tableLastPKs[tableName]
   163  		if !ok {
   164  			tablePK = &binlogdatapb.TableLastPK{
   165  				TableName: tableName,
   166  				Lastpk:    nil,
   167  			}
   168  		}
   169  		plan.tablePK = tablePK
   170  		uvs.plans[tableName] = plan
   171  		uvs.tablesToCopy = append(uvs.tablesToCopy, tableName)
   172  
   173  	}
   174  	sort.Strings(uvs.tablesToCopy)
   175  	return nil
   176  }
   177  
   178  // check which rule matches table, validate table is in schema
   179  func matchTable(tableName string, filter *binlogdatapb.Filter, tables map[string]*schema.Table) (*binlogdatapb.Rule, error) {
   180  	if tableName == "dual" {
   181  		return nil, nil
   182  	}
   183  	found := false
   184  	for _, rule := range filter.Rules {
   185  
   186  		switch {
   187  		case tableName == rule.Match:
   188  			found = true
   189  		case strings.HasPrefix(rule.Match, "/"):
   190  			expr := strings.Trim(rule.Match, "/")
   191  			result, err := regexp.MatchString(expr, tableName)
   192  			if err != nil {
   193  				return nil, err
   194  			}
   195  			if !result {
   196  				continue
   197  			}
   198  			found = true
   199  		}
   200  		if found {
   201  			return &binlogdatapb.Rule{
   202  				Match:  tableName,
   203  				Filter: getQuery(tableName, rule.Filter),
   204  			}, nil
   205  		}
   206  	}
   207  
   208  	return nil, nil
   209  }
   210  
   211  // generate equivalent select statement if filter is empty or a keyrange.
   212  func getQuery(tableName string, filter string) string {
   213  	query := filter
   214  	switch {
   215  	case filter == "":
   216  		buf := sqlparser.NewTrackedBuffer(nil)
   217  		buf.Myprintf("select * from %v", sqlparser.NewIdentifierCS(tableName))
   218  		query = buf.String()
   219  	case key.IsKeyRange(filter):
   220  		buf := sqlparser.NewTrackedBuffer(nil)
   221  		buf.Myprintf("select * from %v where in_keyrange(%v)",
   222  			sqlparser.NewIdentifierCS(tableName), sqlparser.NewStrLiteral(filter))
   223  		query = buf.String()
   224  	}
   225  	return query
   226  }
   227  
   228  func (uvs *uvstreamer) Cancel() {
   229  	log.Infof("uvstreamer context is being cancelled")
   230  	uvs.cancel()
   231  }
   232  
   233  // We have not yet implemented the logic to check if an event is for a row that is already copied,
   234  // so we always return true so that we send all events for this table and so we don't miss events.
   235  func (uvs *uvstreamer) isRowCopied(tableName string, ev *binlogdatapb.VEvent) bool {
   236  	return true
   237  }
   238  
   239  // Only send catchup/fastforward events for tables whose copy phase is complete or in progress.
   240  // This ensures we fulfill the at-least-once delivery semantics for events.
   241  // TODO: filter out events for rows not yet copied. Note that we can only do this as a best-effort
   242  // for comparable PKs.
   243  func (uvs *uvstreamer) shouldSendEventForTable(tableName string, ev *binlogdatapb.VEvent) bool {
   244  	table, ok := uvs.plans[tableName]
   245  	// Event is for a table which is not in its copy phase.
   246  	if !ok {
   247  		return true
   248  	}
   249  
   250  	// if table copy was not started and no tablePK was specified we can ignore catchup/fastforward events for it
   251  	if table.tablePK == nil || table.tablePK.Lastpk == nil {
   252  		return false
   253  	}
   254  
   255  	// Table is currently in its copy phase. We have not yet implemented the logic to
   256  	// check if an event is for a row that is already copied, so we always return true
   257  	// there so that we don't miss events.
   258  	// We may send duplicate insert events or update/delete events for rows not yet seen
   259  	// to the client for the table being copied. This is ok as the client is expected to be
   260  	// idempotent: we only promise at-least-once semantics for VStream API (not exactly-once).
   261  	// Aside: vreplication workflows handle at-least-once by adding where clauses that render
   262  	// DML queries, related to events for rows not yet copied, as no-ops.
   263  	return uvs.isRowCopied(tableName, ev)
   264  }
   265  
   266  // Do not send internal heartbeat events. Filter out events for tables whose copy has not been started.
   267  func (uvs *uvstreamer) filterEvents(evs []*binlogdatapb.VEvent) []*binlogdatapb.VEvent {
   268  	if len(uvs.plans) == 0 {
   269  		return evs
   270  	}
   271  	var evs2 []*binlogdatapb.VEvent
   272  	var tableName string
   273  	var shouldSend bool
   274  
   275  	for _, ev := range evs {
   276  		switch ev.Type {
   277  		case binlogdatapb.VEventType_ROW:
   278  			tableName = ev.RowEvent.TableName
   279  		case binlogdatapb.VEventType_FIELD:
   280  			tableName = ev.FieldEvent.TableName
   281  		default:
   282  			tableName = ""
   283  		}
   284  		switch ev.Type {
   285  		case binlogdatapb.VEventType_HEARTBEAT:
   286  			shouldSend = false
   287  		default:
   288  			shouldSend = uvs.shouldSendEventForTable(tableName, ev)
   289  		}
   290  
   291  		if shouldSend {
   292  			evs2 = append(evs2, ev)
   293  		}
   294  	}
   295  	return evs2
   296  }
   297  
   298  // wraps the send parameter and filters events. called by fastforward/catchup
   299  func (uvs *uvstreamer) send2(evs []*binlogdatapb.VEvent) error {
   300  	if len(evs) == 0 {
   301  		return nil
   302  	}
   303  	ev := evs[len(evs)-1]
   304  	if ev.Timestamp != 0 {
   305  		uvs.lastTimestampNs = ev.Timestamp * 1e9
   306  	}
   307  	behind := time.Now().UnixNano() - uvs.lastTimestampNs
   308  	uvs.setReplicationLagSeconds(behind / 1e9)
   309  	//log.Infof("sbm set to %d", uvs.ReplicationLagSeconds)
   310  	var evs2 []*binlogdatapb.VEvent
   311  	if len(uvs.plans) > 0 {
   312  		evs2 = uvs.filterEvents(evs)
   313  	}
   314  	err := uvs.send(evs2)
   315  	if err != nil && err != io.EOF {
   316  		return err
   317  	}
   318  	for _, ev := range evs2 {
   319  		if ev.Type == binlogdatapb.VEventType_GTID {
   320  			uvs.pos, _ = mysql.DecodePosition(ev.Gtid)
   321  			if !uvs.stopPos.IsZero() && uvs.pos.AtLeast(uvs.stopPos) {
   322  				err = io.EOF
   323  			}
   324  		}
   325  	}
   326  	if err != nil {
   327  		uvs.vse.errorCounts.Add("Send", 1)
   328  	}
   329  	return err
   330  }
   331  
   332  func (uvs *uvstreamer) sendEventsForCurrentPos() error {
   333  	log.Infof("sendEventsForCurrentPos")
   334  	evs := []*binlogdatapb.VEvent{{
   335  		Type: binlogdatapb.VEventType_GTID,
   336  		Gtid: mysql.EncodePosition(uvs.pos),
   337  	}, {
   338  		Type: binlogdatapb.VEventType_OTHER,
   339  	}}
   340  	if err := uvs.send(evs); err != nil {
   341  		return wrapError(err, uvs.pos, uvs.vse)
   342  	}
   343  	return nil
   344  }
   345  
   346  func (uvs *uvstreamer) setStreamStartPosition() error {
   347  	curPos, err := uvs.currentPosition()
   348  	if err != nil {
   349  		return vterrors.Wrap(err, "could not obtain current position")
   350  	}
   351  	if uvs.startPos == "current" {
   352  		uvs.pos = curPos
   353  		if err := uvs.sendEventsForCurrentPos(); err != nil {
   354  			return err
   355  		}
   356  		return nil
   357  	}
   358  	pos, err := mysql.DecodePosition(uvs.startPos)
   359  	if err != nil {
   360  		return vterrors.Wrap(err, "could not decode position")
   361  	}
   362  	if !curPos.AtLeast(pos) {
   363  		uvs.vse.errorCounts.Add("GTIDSet Mismatch", 1)
   364  		return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT,
   365  			"GTIDSet Mismatch: requested source position:%v, current target vrep position: %v",
   366  			mysql.EncodePosition(pos), mysql.EncodePosition(curPos))
   367  	}
   368  	uvs.pos = pos
   369  	return nil
   370  }
   371  
   372  func (uvs *uvstreamer) currentPosition() (mysql.Position, error) {
   373  	conn, err := uvs.cp.Connect(uvs.ctx)
   374  	if err != nil {
   375  		return mysql.Position{}, err
   376  	}
   377  	defer conn.Close()
   378  	return conn.PrimaryPosition()
   379  }
   380  
   381  // Possible states:
   382  // 1. TablePKs nil, startPos set to gtid or "current" => start replicating from pos
   383  // 2. TablePKs nil, startPos empty => full table copy of tables matching filter
   384  // 3. TablePKs not nil, startPos empty => table copy (for pks > lastPK)
   385  // 4. TablePKs not nil, startPos set => run catchup from startPos, then table copy  (for pks > lastPK)
   386  func (uvs *uvstreamer) init() error {
   387  	if uvs.startPos == "" /* full copy */ || len(uvs.inTablePKs) > 0 /* resume copy */ {
   388  		if err := uvs.buildTablePlan(); err != nil {
   389  			return err
   390  		}
   391  	}
   392  	if uvs.startPos != "" {
   393  		if err := uvs.setStreamStartPosition(); err != nil {
   394  			return err
   395  		}
   396  	}
   397  	if uvs.pos.IsZero() && (len(uvs.plans) == 0) {
   398  		return fmt.Errorf("stream needs a position or a table to copy")
   399  	}
   400  	return nil
   401  }
   402  
   403  // Stream streams binlog events.
   404  func (uvs *uvstreamer) Stream() error {
   405  	log.Info("Stream() called")
   406  	if err := uvs.init(); err != nil {
   407  		return err
   408  	}
   409  	if len(uvs.plans) > 0 {
   410  		log.Info("TablePKs is not nil: starting vs.copy()")
   411  		if err := uvs.copy(uvs.ctx); err != nil {
   412  			log.Infof("uvstreamer.Stream() copy returned with err %s", err)
   413  			uvs.vse.errorCounts.Add("Copy", 1)
   414  			return err
   415  		}
   416  		if err := uvs.allCopyComplete(); err != nil {
   417  			return err
   418  		}
   419  	}
   420  	vs := newVStreamer(uvs.ctx, uvs.cp, uvs.se, mysql.EncodePosition(uvs.pos), mysql.EncodePosition(uvs.stopPos),
   421  		uvs.filter, uvs.getVSchema(), uvs.send, "replicate", uvs.vse)
   422  
   423  	uvs.setVs(vs)
   424  	return vs.Stream()
   425  }
   426  
   427  func (uvs *uvstreamer) lock(msg string) {
   428  	uvs.mu.Lock()
   429  }
   430  
   431  func (uvs *uvstreamer) unlock(msg string) {
   432  	uvs.mu.Unlock()
   433  }
   434  
   435  func (uvs *uvstreamer) setVs(vs *vstreamer) {
   436  	uvs.lock("setVs")
   437  	defer uvs.unlock("setVs")
   438  	uvs.vs = vs
   439  }
   440  
   441  // SetVSchema updates the vstreamer against the new vschema.
   442  func (uvs *uvstreamer) SetVSchema(vschema *localVSchema) {
   443  	uvs.lock("SetVSchema")
   444  	defer uvs.unlock("SetVSchema")
   445  	uvs.vschema = vschema
   446  	if uvs.vs != nil {
   447  		uvs.vs.SetVSchema(vschema)
   448  	}
   449  }
   450  
   451  func (uvs *uvstreamer) getVSchema() *localVSchema {
   452  	uvs.lock("getVSchema")
   453  	defer uvs.unlock("getVSchema")
   454  	return uvs.vschema
   455  }
   456  
   457  func (uvs *uvstreamer) setCopyState(tableName string, qr *querypb.QueryResult) {
   458  	uvs.plans[tableName].tablePK.Lastpk = qr
   459  }
   460  
   461  func (uvs *uvstreamer) allCopyComplete() error {
   462  	ev := &binlogdatapb.VEvent{
   463  		Type: binlogdatapb.VEventType_COPY_COMPLETED,
   464  	}
   465  
   466  	if err := uvs.send([]*binlogdatapb.VEvent{ev}); err != nil {
   467  		return err
   468  	}
   469  	return nil
   470  }
   471  
   472  // dummy event sent only in test mode
   473  func (uvs *uvstreamer) sendTestEvent(msg string) {
   474  	if !uvstreamerTestMode {
   475  		return
   476  	}
   477  	ev := &binlogdatapb.VEvent{
   478  		Type: binlogdatapb.VEventType_OTHER,
   479  		Gtid: msg,
   480  	}
   481  
   482  	if err := uvs.send([]*binlogdatapb.VEvent{ev}); err != nil {
   483  		return
   484  	}
   485  }
   486  
   487  func (uvs *uvstreamer) copyComplete(tableName string) error {
   488  	evs := []*binlogdatapb.VEvent{
   489  		{Type: binlogdatapb.VEventType_BEGIN},
   490  		{
   491  			Type: binlogdatapb.VEventType_LASTPK,
   492  			LastPKEvent: &binlogdatapb.LastPKEvent{
   493  				TableLastPK: &binlogdatapb.TableLastPK{
   494  					TableName: tableName,
   495  					Lastpk:    nil,
   496  				},
   497  				Completed: true,
   498  			},
   499  		},
   500  		{Type: binlogdatapb.VEventType_COMMIT},
   501  	}
   502  	if err := uvs.send(evs); err != nil {
   503  		return err
   504  	}
   505  
   506  	delete(uvs.plans, tableName)
   507  	uvs.tablesToCopy = uvs.tablesToCopy[1:]
   508  	return nil
   509  }
   510  
   511  func (uvs *uvstreamer) setPosition(gtid string, isInTx bool) error {
   512  	if gtid == "" {
   513  		return fmt.Errorf("empty gtid passed to setPosition")
   514  	}
   515  	pos, err := mysql.DecodePosition(gtid)
   516  	if err != nil {
   517  		return err
   518  	}
   519  	if pos.Equal(uvs.pos) {
   520  		return nil
   521  	}
   522  	gtidEvent := &binlogdatapb.VEvent{
   523  		Type:     binlogdatapb.VEventType_GTID,
   524  		Gtid:     gtid,
   525  		Keyspace: uvs.vse.keyspace,
   526  		Shard:    uvs.vse.shard,
   527  	}
   528  
   529  	var evs []*binlogdatapb.VEvent
   530  	if !isInTx {
   531  		evs = append(evs, &binlogdatapb.VEvent{
   532  			Type:     binlogdatapb.VEventType_BEGIN,
   533  			Keyspace: uvs.vse.keyspace,
   534  			Shard:    uvs.vse.shard,
   535  		})
   536  	}
   537  	evs = append(evs, gtidEvent)
   538  	if !isInTx {
   539  		evs = append(evs, &binlogdatapb.VEvent{
   540  			Type:     binlogdatapb.VEventType_COMMIT,
   541  			Keyspace: uvs.vse.keyspace,
   542  			Shard:    uvs.vse.shard,
   543  		})
   544  	}
   545  	if err := uvs.send(evs); err != nil {
   546  		return err
   547  	}
   548  	uvs.pos = pos
   549  	return nil
   550  }
   551  
   552  func (uvs *uvstreamer) getReplicationLagSeconds() int64 {
   553  	uvs.mu.Lock()
   554  	defer uvs.mu.Unlock()
   555  	return uvs.ReplicationLagSeconds
   556  }
   557  
   558  func (uvs *uvstreamer) setReplicationLagSeconds(sbm int64) {
   559  	uvs.mu.Lock()
   560  	defer uvs.mu.Unlock()
   561  	uvs.ReplicationLagSeconds = sbm
   562  }