vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/vstreamer/rowstreamer.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vstreamer
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"vitess.io/vitess/go/mysql"
    26  	"vitess.io/vitess/go/mysql/collations"
    27  	"vitess.io/vitess/go/sqltypes"
    28  	"vitess.io/vitess/go/textutil"
    29  	"vitess.io/vitess/go/timer"
    30  	"vitess.io/vitess/go/vt/dbconfigs"
    31  	"vitess.io/vitess/go/vt/log"
    32  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    33  	querypb "vitess.io/vitess/go/vt/proto/query"
    34  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    35  	"vitess.io/vitess/go/vt/sqlparser"
    36  	"vitess.io/vitess/go/vt/vterrors"
    37  	"vitess.io/vitess/go/vt/vtgate/vindexes"
    38  	"vitess.io/vitess/go/vt/vttablet/tabletserver/schema"
    39  )
    40  
    41  var (
    42  	rowStreamertHeartbeatInterval = 10 * time.Second
    43  )
    44  
    45  // RowStreamer exposes an externally usable interface to rowStreamer.
    46  type RowStreamer interface {
    47  	Stream() error
    48  	Cancel()
    49  }
    50  
    51  // NewRowStreamer returns a RowStreamer
    52  func NewRowStreamer(ctx context.Context, cp dbconfigs.Connector, se *schema.Engine, query string, lastpk []sqltypes.Value, send func(*binlogdatapb.VStreamRowsResponse) error, vse *Engine) RowStreamer {
    53  	return newRowStreamer(ctx, cp, se, query, lastpk, &localVSchema{vschema: &vindexes.VSchema{}}, send, vse)
    54  }
    55  
    56  // rowStreamer is used for copying the existing rows of a table
    57  // before vreplication begins streaming binlogs. The rowStreamer
    58  // responds to a request with the GTID position as of which it
    59  // streams the rows of a table. This allows vreplication to synchronize
    60  // its events as of the returned GTID before adding the new rows.
    61  // For every set of rows sent, the last pk value is also sent.
    62  // This allows for the streaming to be resumed based on the last
    63  // pk value processed.
    64  type rowStreamer struct {
    65  	ctx    context.Context
    66  	cancel func()
    67  
    68  	cp      dbconfigs.Connector
    69  	se      *schema.Engine
    70  	query   string
    71  	lastpk  []sqltypes.Value
    72  	send    func(*binlogdatapb.VStreamRowsResponse) error
    73  	vschema *localVSchema
    74  
    75  	plan          *Plan
    76  	pkColumns     []int
    77  	ukColumnNames []string
    78  	sendQuery     string
    79  	vse           *Engine
    80  	pktsize       PacketSizer
    81  
    82  	throttleResponseRateLimiter *timer.RateLimiter
    83  }
    84  
    85  func newRowStreamer(ctx context.Context, cp dbconfigs.Connector, se *schema.Engine, query string, lastpk []sqltypes.Value, vschema *localVSchema, send func(*binlogdatapb.VStreamRowsResponse) error, vse *Engine) *rowStreamer {
    86  	ctx, cancel := context.WithCancel(ctx)
    87  	return &rowStreamer{
    88  		ctx:     ctx,
    89  		cancel:  cancel,
    90  		cp:      cp,
    91  		se:      se,
    92  		query:   query,
    93  		lastpk:  lastpk,
    94  		send:    send,
    95  		vschema: vschema,
    96  		vse:     vse,
    97  		pktsize: DefaultPacketSizer(),
    98  
    99  		throttleResponseRateLimiter: timer.NewRateLimiter(rowStreamertHeartbeatInterval),
   100  	}
   101  }
   102  
   103  func (rs *rowStreamer) Cancel() {
   104  	log.Info("Rowstreamer Cancel() called")
   105  	rs.cancel()
   106  }
   107  
   108  func (rs *rowStreamer) Stream() error {
   109  	// Ensure sh is Open. If vttablet came up in a non_serving role,
   110  	// the schema engine may not have been initialized.
   111  	if err := rs.se.Open(); err != nil {
   112  		return err
   113  	}
   114  	if err := rs.buildPlan(); err != nil {
   115  		return err
   116  	}
   117  	conn, err := snapshotConnect(rs.ctx, rs.cp)
   118  	if err != nil {
   119  		return err
   120  	}
   121  	defer conn.Close()
   122  	if _, err := conn.ExecuteFetch("set names binary", 1, false); err != nil {
   123  		return err
   124  	}
   125  	return rs.streamQuery(conn, rs.send)
   126  }
   127  
   128  func (rs *rowStreamer) buildPlan() error {
   129  	// This pre-parsing is required to extract the table name
   130  	// and create its metadata.
   131  	sel, fromTable, err := analyzeSelect(rs.query)
   132  	if err != nil {
   133  		return err
   134  	}
   135  
   136  	st, err := rs.se.GetTableForPos(fromTable, "")
   137  	if err != nil {
   138  		// There is a scenario where vstreamer's table state can be out-of-date, and this happens
   139  		// with vitess migrations, based on vreplication.
   140  		// Vitess migrations use an elaborate cut-over flow where tables are swapped away while traffic is
   141  		// being blocked. The RENAME flow is such that at some point the table is renamed away, leaving a
   142  		// "puncture"; this is an event the is captured by vstreamer. The completion of the flow fixes the
   143  		// puncture, and places a new table under the original table's name, but the way it is done does not
   144  		// cause vstreamer to refresh schema state.
   145  		// there is therefore a reproducable valid sequence of events where vstreamer thinks a table does not exist,
   146  		// where it in fact does exist.
   147  		// For this reason we give vstreamer a "second chance" to review the up-to-date state of the schema.
   148  		// In the future, we will reduce this operation to reading a single table rather than the entire schema.
   149  		rs.se.ReloadAt(context.Background(), mysql.Position{})
   150  		st, err = rs.se.GetTableForPos(fromTable, "")
   151  	}
   152  	if err != nil {
   153  		return err
   154  	}
   155  	ti := &Table{
   156  		Name:   st.Name,
   157  		Fields: st.Fields,
   158  	}
   159  	// The plan we build is identical to the one for vstreamer.
   160  	// This is because the row format of a read is identical
   161  	// to the row format of a binlog event. So, the same
   162  	// filtering will work.
   163  	rs.plan, err = buildTablePlan(ti, rs.vschema, rs.query)
   164  	if err != nil {
   165  		log.Errorf("%s", err.Error())
   166  		return err
   167  	}
   168  
   169  	directives := sel.Comments.Directives()
   170  	if s, found := directives.GetString("ukColumns", ""); found {
   171  		rs.ukColumnNames, err = textutil.SplitUnescape(s, ",")
   172  		if err != nil {
   173  			return err
   174  		}
   175  	}
   176  
   177  	rs.pkColumns, err = rs.buildPKColumns(st)
   178  	if err != nil {
   179  		return err
   180  	}
   181  	rs.sendQuery, err = rs.buildSelect()
   182  	if err != nil {
   183  		return err
   184  	}
   185  	return err
   186  }
   187  
   188  // buildPKColumnsFromUniqueKey assumes a unique key is indicated,
   189  func (rs *rowStreamer) buildPKColumnsFromUniqueKey() ([]int, error) {
   190  	var pkColumns = make([]int, 0)
   191  	// We wish to utilize a UNIQUE KEY which is not the PRIMARY KEY/
   192  
   193  	for _, colName := range rs.ukColumnNames {
   194  		index := rs.plan.Table.FindColumn(sqlparser.NewIdentifierCI(colName))
   195  		if index < 0 {
   196  			return pkColumns, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "column %v is listed as unique key, but not present in table %v", colName, rs.plan.Table.Name)
   197  		}
   198  		pkColumns = append(pkColumns, index)
   199  	}
   200  	return pkColumns, nil
   201  }
   202  
   203  func (rs *rowStreamer) buildPKColumns(st *binlogdatapb.MinimalTable) ([]int, error) {
   204  	if len(rs.ukColumnNames) > 0 {
   205  		return rs.buildPKColumnsFromUniqueKey()
   206  	}
   207  	var pkColumns = make([]int, 0)
   208  	if len(st.PKColumns) == 0 {
   209  		// Use a PK equivalent if one exists
   210  		pkColumns, err := rs.vse.mapPKEquivalentCols(rs.ctx, st)
   211  		if err == nil && len(pkColumns) != 0 {
   212  			return pkColumns, nil
   213  		}
   214  
   215  		// Fall back to using every column in the table if there's no PK or PKE
   216  		pkColumns = make([]int, len(st.Fields))
   217  		for i := range st.Fields {
   218  			pkColumns[i] = i
   219  		}
   220  		return pkColumns, nil
   221  	}
   222  	for _, pk := range st.PKColumns {
   223  		if pk >= int64(len(st.Fields)) {
   224  			return nil, fmt.Errorf("primary key %d refers to non-existent column", pk)
   225  		}
   226  		pkColumns = append(pkColumns, int(pk))
   227  	}
   228  	return pkColumns, nil
   229  }
   230  
   231  func (rs *rowStreamer) buildSelect() (string, error) {
   232  	buf := sqlparser.NewTrackedBuffer(nil)
   233  	// We could have used select *, but being explicit is more predictable.
   234  	buf.Myprintf("select ")
   235  	prefix := ""
   236  	for _, col := range rs.plan.Table.Fields {
   237  		if rs.plan.isConvertColumnUsingUTF8(col.Name) {
   238  			buf.Myprintf("%sconvert(%v using utf8mb4) as %v", prefix, sqlparser.NewIdentifierCI(col.Name), sqlparser.NewIdentifierCI(col.Name))
   239  		} else if funcExpr := rs.plan.getColumnFuncExpr(col.Name); funcExpr != nil {
   240  			buf.Myprintf("%s%s as %v", prefix, sqlparser.String(funcExpr), sqlparser.NewIdentifierCI(col.Name))
   241  		} else {
   242  			buf.Myprintf("%s%v", prefix, sqlparser.NewIdentifierCI(col.Name))
   243  		}
   244  		prefix = ", "
   245  	}
   246  	buf.Myprintf(" from %v", sqlparser.NewIdentifierCS(rs.plan.Table.Name))
   247  	if len(rs.lastpk) != 0 {
   248  		if len(rs.lastpk) != len(rs.pkColumns) {
   249  			return "", fmt.Errorf("primary key values don't match length: %v vs %v", rs.lastpk, rs.pkColumns)
   250  		}
   251  		buf.WriteString(" where ")
   252  		prefix := ""
   253  		// This loop handles the case for composite pks. For example,
   254  		// if lastpk was (1,2), the where clause would be:
   255  		// (col1 = 1 and col2 > 2) or (col1 > 1).
   256  		// A tuple inequality like (col1,col2) > (1,2) ends up
   257  		// being a full table scan for mysql.
   258  		for lastcol := len(rs.pkColumns) - 1; lastcol >= 0; lastcol-- {
   259  			buf.Myprintf("%s(", prefix)
   260  			prefix = " or "
   261  			for i, pk := range rs.pkColumns[:lastcol] {
   262  				buf.Myprintf("%v = ", sqlparser.NewIdentifierCI(rs.plan.Table.Fields[pk].Name))
   263  				rs.lastpk[i].EncodeSQL(buf)
   264  				buf.Myprintf(" and ")
   265  			}
   266  			buf.Myprintf("%v > ", sqlparser.NewIdentifierCI(rs.plan.Table.Fields[rs.pkColumns[lastcol]].Name))
   267  			rs.lastpk[lastcol].EncodeSQL(buf)
   268  			buf.Myprintf(")")
   269  		}
   270  	}
   271  	buf.Myprintf(" order by ", sqlparser.NewIdentifierCS(rs.plan.Table.Name))
   272  	prefix = ""
   273  	for _, pk := range rs.pkColumns {
   274  		buf.Myprintf("%s%v", prefix, sqlparser.NewIdentifierCI(rs.plan.Table.Fields[pk].Name))
   275  		prefix = ", "
   276  	}
   277  	return buf.String(), nil
   278  }
   279  
   280  func (rs *rowStreamer) streamQuery(conn *snapshotConn, send func(*binlogdatapb.VStreamRowsResponse) error) error {
   281  
   282  	var sendMu sync.Mutex
   283  	safeSend := func(r *binlogdatapb.VStreamRowsResponse) error {
   284  		sendMu.Lock()
   285  		defer sendMu.Unlock()
   286  		return send(r)
   287  	}
   288  	// Let's wait until MySQL is in good shape to stream rows
   289  	if err := rs.vse.waitForMySQL(rs.ctx, rs.cp, rs.plan.Table.Name); err != nil {
   290  		return err
   291  	}
   292  
   293  	log.Infof("Streaming query: %v\n", rs.sendQuery)
   294  	gtid, rotatedLog, err := conn.streamWithSnapshot(rs.ctx, rs.plan.Table.Name, rs.sendQuery)
   295  	if rotatedLog {
   296  		rs.vse.vstreamerFlushedBinlogs.Add(1)
   297  	}
   298  	if err != nil {
   299  		return err
   300  	}
   301  
   302  	// first call the callback with the fields
   303  	flds, err := conn.Fields()
   304  	if err != nil {
   305  		return err
   306  	}
   307  	pkfields := make([]*querypb.Field, len(rs.pkColumns))
   308  	for i, pk := range rs.pkColumns {
   309  		pkfields[i] = &querypb.Field{
   310  			Name: flds[pk].Name,
   311  			Type: flds[pk].Type,
   312  		}
   313  	}
   314  
   315  	charsets := make([]collations.ID, len(flds))
   316  	for i, fld := range flds {
   317  		charsets[i] = collations.ID(fld.Charset)
   318  	}
   319  
   320  	err = safeSend(&binlogdatapb.VStreamRowsResponse{
   321  		Fields:   rs.plan.fields(),
   322  		Pkfields: pkfields,
   323  		Gtid:     gtid,
   324  	})
   325  	if err != nil {
   326  		return fmt.Errorf("stream send error: %v", err)
   327  	}
   328  
   329  	// streamQuery sends heartbeats as long as it operates
   330  	heartbeatTicker := time.NewTicker(rowStreamertHeartbeatInterval)
   331  	defer heartbeatTicker.Stop()
   332  	go func() {
   333  		for range heartbeatTicker.C {
   334  			safeSend(&binlogdatapb.VStreamRowsResponse{Heartbeat: true})
   335  		}
   336  	}()
   337  
   338  	var response binlogdatapb.VStreamRowsResponse
   339  	var rows []*querypb.Row
   340  	var rowCount int
   341  	var mysqlrow []sqltypes.Value
   342  
   343  	filtered := make([]sqltypes.Value, len(rs.plan.ColExprs))
   344  	lastpk := make([]sqltypes.Value, len(rs.pkColumns))
   345  	byteCount := 0
   346  	for {
   347  		if rs.ctx.Err() != nil {
   348  			log.Infof("Stream ended because of ctx.Done")
   349  			return fmt.Errorf("stream ended: %v", rs.ctx.Err())
   350  		}
   351  
   352  		// check throttler.
   353  		if !rs.vse.throttlerClient.ThrottleCheckOKOrWait(rs.ctx) {
   354  			rs.throttleResponseRateLimiter.Do(func() error {
   355  				return safeSend(&binlogdatapb.VStreamRowsResponse{Throttled: true})
   356  			})
   357  			continue
   358  		}
   359  
   360  		if mysqlrow != nil {
   361  			mysqlrow = mysqlrow[:0]
   362  		}
   363  		mysqlrow, err = conn.FetchNext(mysqlrow)
   364  		if err != nil {
   365  			return err
   366  		}
   367  		if mysqlrow == nil {
   368  			break
   369  		}
   370  		// Compute lastpk here, because we'll need it
   371  		// at the end after the loop exits.
   372  		for i, pk := range rs.pkColumns {
   373  			lastpk[i] = mysqlrow[pk]
   374  		}
   375  		// Reuse the vstreamer's filter.
   376  		ok, err := rs.plan.filter(mysqlrow, filtered, charsets)
   377  		if err != nil {
   378  			return err
   379  		}
   380  		if ok {
   381  			if rowCount >= len(rows) {
   382  				rows = append(rows, &querypb.Row{})
   383  			}
   384  			byteCount += sqltypes.RowToProto3Inplace(filtered, rows[rowCount])
   385  			rowCount++
   386  		}
   387  
   388  		if rs.pktsize.ShouldSend(byteCount) {
   389  			response.Rows = rows[:rowCount]
   390  			response.Lastpk = sqltypes.RowToProto3(lastpk)
   391  
   392  			rs.vse.rowStreamerNumRows.Add(int64(len(response.Rows)))
   393  			rs.vse.rowStreamerNumPackets.Add(int64(1))
   394  			startSend := time.Now()
   395  			err = safeSend(&response)
   396  			if err != nil {
   397  				return err
   398  			}
   399  			rs.pktsize.Record(byteCount, time.Since(startSend))
   400  			rowCount = 0
   401  			byteCount = 0
   402  		}
   403  	}
   404  
   405  	if rowCount > 0 {
   406  		response.Rows = rows[:rowCount]
   407  		response.Lastpk = sqltypes.RowToProto3(lastpk)
   408  
   409  		rs.vse.rowStreamerNumRows.Add(int64(len(response.Rows)))
   410  		err = safeSend(&response)
   411  		if err != nil {
   412  			return err
   413  		}
   414  	}
   415  
   416  	return nil
   417  }