vitess.io/vitess@v0.16.2/go/vt/binlog/event_streamer.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package binlog
    18  
    19  import (
    20  	"encoding/base64"
    21  	"fmt"
    22  	"strconv"
    23  	"strings"
    24  
    25  	"context"
    26  
    27  	"vitess.io/vitess/go/mysql"
    28  	"vitess.io/vitess/go/sqltypes"
    29  	"vitess.io/vitess/go/vt/dbconfigs"
    30  	"vitess.io/vitess/go/vt/log"
    31  	"vitess.io/vitess/go/vt/sqlparser"
    32  	"vitess.io/vitess/go/vt/vttablet/tabletserver/schema"
    33  
    34  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    35  	querypb "vitess.io/vitess/go/vt/proto/query"
    36  )
    37  
    38  var (
    39  	binlogSetInsertID     = "SET INSERT_ID="
    40  	binlogSetInsertIDLen  = len(binlogSetInsertID)
    41  	streamCommentStart    = "/* _stream "
    42  	streamCommentStartLen = len(streamCommentStart)
    43  )
    44  
    45  type sendEventFunc func(event *querypb.StreamEvent) error
    46  
    47  // EventStreamer is an adapter on top of a binlog Streamer that convert
    48  // the events into StreamEvent objects.
    49  type EventStreamer struct {
    50  	bls       *Streamer
    51  	sendEvent sendEventFunc
    52  }
    53  
    54  // NewEventStreamer returns a new EventStreamer on top of a Streamer
    55  func NewEventStreamer(cp dbconfigs.Connector, se *schema.Engine, startPos mysql.Position, timestamp int64, sendEvent sendEventFunc) *EventStreamer {
    56  	evs := &EventStreamer{
    57  		sendEvent: sendEvent,
    58  	}
    59  	evs.bls = NewStreamer(cp, se, nil, startPos, timestamp, evs.transactionToEvent)
    60  	evs.bls.extractPK = true
    61  	return evs
    62  }
    63  
    64  // Stream starts streaming updates
    65  func (evs *EventStreamer) Stream(ctx context.Context) error {
    66  	return evs.bls.Stream(ctx)
    67  }
    68  
    69  func (evs *EventStreamer) transactionToEvent(eventToken *querypb.EventToken, statements []FullBinlogStatement) error {
    70  	event := &querypb.StreamEvent{
    71  		EventToken: eventToken,
    72  	}
    73  	var err error
    74  	var insertid int64
    75  	for _, stmt := range statements {
    76  		switch stmt.Statement.Category {
    77  		case binlogdatapb.BinlogTransaction_Statement_BL_SET:
    78  			sql := string(stmt.Statement.Sql)
    79  			if strings.HasPrefix(sql, binlogSetInsertID) {
    80  				insertid, err = strconv.ParseInt(sql[binlogSetInsertIDLen:], 10, 64)
    81  				if err != nil {
    82  					binlogStreamerErrors.Add("EventStreamer", 1)
    83  					log.Errorf("%v: %s", err, sql)
    84  				}
    85  			}
    86  		case binlogdatapb.BinlogTransaction_Statement_BL_INSERT,
    87  			binlogdatapb.BinlogTransaction_Statement_BL_UPDATE,
    88  			binlogdatapb.BinlogTransaction_Statement_BL_DELETE:
    89  			var dmlStatement *querypb.StreamEvent_Statement
    90  			dmlStatement, insertid, err = evs.buildDMLStatement(stmt, insertid)
    91  			if err != nil {
    92  				dmlStatement = &querypb.StreamEvent_Statement{
    93  					Category: querypb.StreamEvent_Statement_Error,
    94  					Sql:      stmt.Statement.Sql,
    95  				}
    96  			}
    97  			event.Statements = append(event.Statements, dmlStatement)
    98  		case binlogdatapb.BinlogTransaction_Statement_BL_DDL:
    99  			ddlStatement := &querypb.StreamEvent_Statement{
   100  				Category: querypb.StreamEvent_Statement_DDL,
   101  				Sql:      stmt.Statement.Sql,
   102  			}
   103  			event.Statements = append(event.Statements, ddlStatement)
   104  		case binlogdatapb.BinlogTransaction_Statement_BL_UNRECOGNIZED:
   105  			unrecognized := &querypb.StreamEvent_Statement{
   106  				Category: querypb.StreamEvent_Statement_Error,
   107  				Sql:      stmt.Statement.Sql,
   108  			}
   109  			event.Statements = append(event.Statements, unrecognized)
   110  		default:
   111  			binlogStreamerErrors.Add("EventStreamer", 1)
   112  			log.Errorf("Unrecognized event: %v: %s", stmt.Statement.Category, stmt.Statement.Sql)
   113  		}
   114  	}
   115  	return evs.sendEvent(event)
   116  }
   117  
   118  /*
   119  buildDMLStatement recovers the PK from a FullBinlogStatement.
   120  For RBR, the values are already in there, just need to be translated.
   121  For SBR, parses the tuples of the full stream comment.
   122  The _stream comment is extracted into a StreamEvent.Statement.
   123  */
   124  // Example query: insert into _table_(foo) values ('foo') /* _stream _table_ (eid id name ) (null 1 'bmFtZQ==' ); */
   125  // the "null" value is used for auto-increment columns.
   126  func (evs *EventStreamer) buildDMLStatement(stmt FullBinlogStatement, insertid int64) (*querypb.StreamEvent_Statement, int64, error) {
   127  	// For RBR events, we know all this already, just extract it.
   128  	if stmt.PKNames != nil {
   129  		// We get an array of []sqltypes.Value, need to convert to querypb.Row.
   130  		dmlStatement := &querypb.StreamEvent_Statement{
   131  			Category:         querypb.StreamEvent_Statement_DML,
   132  			TableName:        stmt.Table,
   133  			PrimaryKeyFields: stmt.PKNames,
   134  			PrimaryKeyValues: []*querypb.Row{sqltypes.RowToProto3(stmt.PKValues)},
   135  		}
   136  		// InsertID is only needed to fill in the ID on next queries,
   137  		// but if we use RBR, it's already in the values, so just return 0.
   138  		return dmlStatement, 0, nil
   139  	}
   140  
   141  	sql := string(stmt.Statement.Sql)
   142  
   143  	// first extract the comment
   144  	commentIndex := strings.LastIndex(sql, streamCommentStart)
   145  	if commentIndex == -1 {
   146  		return nil, insertid, fmt.Errorf("missing stream comment")
   147  	}
   148  	dmlComment := sql[commentIndex+streamCommentStartLen:]
   149  
   150  	// then start building the response
   151  	dmlStatement := &querypb.StreamEvent_Statement{
   152  		Category: querypb.StreamEvent_Statement_DML,
   153  	}
   154  	tokenizer := sqlparser.NewStringTokenizer(dmlComment)
   155  
   156  	// first parse the table name
   157  	typ, val := tokenizer.Scan()
   158  	if typ != sqlparser.ID {
   159  		return nil, insertid, fmt.Errorf("expecting table name in stream comment")
   160  	}
   161  	dmlStatement.TableName = string(val)
   162  
   163  	// then parse the PK names
   164  	var err error
   165  	dmlStatement.PrimaryKeyFields, err = parsePkNames(tokenizer)
   166  	hasNegatives := make([]bool, len(dmlStatement.PrimaryKeyFields))
   167  	if err != nil {
   168  		return nil, insertid, err
   169  	}
   170  
   171  	// then parse the PK values, one at a time
   172  	for typ, _ = tokenizer.Scan(); typ != ';'; typ, _ = tokenizer.Scan() {
   173  		switch typ {
   174  		case '(':
   175  			// pkTuple is a list of pk values
   176  			var pkTuple *querypb.Row
   177  			pkTuple, insertid, err = parsePkTuple(tokenizer, insertid, dmlStatement.PrimaryKeyFields, hasNegatives)
   178  			if err != nil {
   179  				return nil, insertid, err
   180  			}
   181  			dmlStatement.PrimaryKeyValues = append(dmlStatement.PrimaryKeyValues, pkTuple)
   182  		default:
   183  			return nil, insertid, fmt.Errorf("expecting '('")
   184  		}
   185  	}
   186  
   187  	return dmlStatement, insertid, nil
   188  }
   189  
   190  // parsePkNames parses something like (eid id name )
   191  func parsePkNames(tokenizer *sqlparser.Tokenizer) ([]*querypb.Field, error) {
   192  	var columns []*querypb.Field
   193  	if typ, _ := tokenizer.Scan(); typ != '(' {
   194  		return nil, fmt.Errorf("expecting '('")
   195  	}
   196  	for typ, val := tokenizer.Scan(); typ != ')'; typ, val = tokenizer.Scan() {
   197  		switch typ {
   198  		case sqlparser.ID:
   199  			columns = append(columns, &querypb.Field{
   200  				Name: string(val),
   201  			})
   202  		default:
   203  			return nil, fmt.Errorf("syntax error at position: %d", tokenizer.Pos)
   204  		}
   205  	}
   206  	return columns, nil
   207  }
   208  
   209  // parsePkTuple parses something like (null 1 'bmFtZQ==' ). For numbers, the default
   210  // type is Int64. If an unsigned number that can't fit in an int64 is seen, then the
   211  // type is set to Uint64. In such cases, if a negative number was previously seen, the
   212  // function returns an error.
   213  func parsePkTuple(tokenizer *sqlparser.Tokenizer, insertid int64, fields []*querypb.Field, hasNegatives []bool) (*querypb.Row, int64, error) {
   214  	result := &querypb.Row{}
   215  
   216  	index := 0
   217  	for typ, val := tokenizer.Scan(); typ != ')'; typ, val = tokenizer.Scan() {
   218  		if index >= len(fields) {
   219  			return nil, insertid, fmt.Errorf("length mismatch in values")
   220  		}
   221  
   222  		switch typ {
   223  		case '-':
   224  			hasNegatives[index] = true
   225  			typ2, val2 := tokenizer.Scan()
   226  			if typ2 != sqlparser.INTEGRAL {
   227  				return nil, insertid, fmt.Errorf("expecting number after '-'")
   228  			}
   229  			fullVal := append([]byte{'-'}, val2...)
   230  			if _, err := strconv.ParseInt(string(fullVal), 0, 64); err != nil {
   231  				return nil, insertid, err
   232  			}
   233  			switch fields[index].Type {
   234  			case sqltypes.Null:
   235  				fields[index].Type = sqltypes.Int64
   236  			case sqltypes.Int64:
   237  				// no-op
   238  			default:
   239  				return nil, insertid, fmt.Errorf("incompatible negative number field with type %v", fields[index].Type)
   240  			}
   241  
   242  			result.Lengths = append(result.Lengths, int64(len(fullVal)))
   243  			result.Values = append(result.Values, fullVal...)
   244  		case sqlparser.INTEGRAL:
   245  			unsigned, err := strconv.ParseUint(string(val), 0, 64)
   246  			if err != nil {
   247  				return nil, insertid, err
   248  			}
   249  			if unsigned > uint64(9223372036854775807) {
   250  				// Number is a uint64 that can't fit in an int64.
   251  				if hasNegatives[index] {
   252  					return nil, insertid, fmt.Errorf("incompatible unsigned number field with type %v", fields[index].Type)
   253  				}
   254  				switch fields[index].Type {
   255  				case sqltypes.Null, sqltypes.Int64:
   256  					fields[index].Type = sqltypes.Uint64
   257  				case sqltypes.Uint64:
   258  					// no-op
   259  				default:
   260  					return nil, insertid, fmt.Errorf("incompatible number field with type %v", fields[index].Type)
   261  				}
   262  			} else {
   263  				// Could be int64 or uint64.
   264  				switch fields[index].Type {
   265  				case sqltypes.Null:
   266  					fields[index].Type = sqltypes.Int64
   267  				case sqltypes.Int64, sqltypes.Uint64:
   268  					// no-op
   269  				default:
   270  					return nil, insertid, fmt.Errorf("incompatible number field with type %v", fields[index].Type)
   271  				}
   272  			}
   273  
   274  			result.Lengths = append(result.Lengths, int64(len(val)))
   275  			result.Values = append(result.Values, val...)
   276  		case sqlparser.NULL:
   277  			switch fields[index].Type {
   278  			case sqltypes.Null:
   279  				fields[index].Type = sqltypes.Int64
   280  			case sqltypes.Int64, sqltypes.Uint64:
   281  				// no-op
   282  			default:
   283  				return nil, insertid, fmt.Errorf("incompatible auto-increment field with type %v", fields[index].Type)
   284  			}
   285  
   286  			v := strconv.AppendInt(nil, insertid, 10)
   287  			result.Lengths = append(result.Lengths, int64(len(v)))
   288  			result.Values = append(result.Values, v...)
   289  			insertid++
   290  		case sqlparser.STRING:
   291  			switch fields[index].Type {
   292  			case sqltypes.Null:
   293  				fields[index].Type = sqltypes.VarBinary
   294  			case sqltypes.VarBinary:
   295  				// no-op
   296  			default:
   297  				return nil, insertid, fmt.Errorf("incompatible string field with type %v", fields[index].Type)
   298  			}
   299  
   300  			decoded, err := base64.StdEncoding.DecodeString(val)
   301  			if err != nil {
   302  				return nil, insertid, err
   303  			}
   304  			result.Lengths = append(result.Lengths, int64(len(decoded)))
   305  			result.Values = append(result.Values, decoded...)
   306  		default:
   307  			return nil, insertid, fmt.Errorf("syntax error at position: %d", tokenizer.Pos)
   308  		}
   309  		index++
   310  	}
   311  
   312  	if index != len(fields) {
   313  		return nil, insertid, fmt.Errorf("length mismatch in values")
   314  	}
   315  	return result, insertid, nil
   316  }