vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/vstreamer/rowstreamer.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vstreamer 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "vitess.io/vitess/go/mysql" 26 "vitess.io/vitess/go/mysql/collations" 27 "vitess.io/vitess/go/sqltypes" 28 "vitess.io/vitess/go/textutil" 29 "vitess.io/vitess/go/timer" 30 "vitess.io/vitess/go/vt/dbconfigs" 31 "vitess.io/vitess/go/vt/log" 32 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 33 querypb "vitess.io/vitess/go/vt/proto/query" 34 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 35 "vitess.io/vitess/go/vt/sqlparser" 36 "vitess.io/vitess/go/vt/vterrors" 37 "vitess.io/vitess/go/vt/vtgate/vindexes" 38 "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" 39 ) 40 41 var ( 42 rowStreamertHeartbeatInterval = 10 * time.Second 43 ) 44 45 // RowStreamer exposes an externally usable interface to rowStreamer. 46 type RowStreamer interface { 47 Stream() error 48 Cancel() 49 } 50 51 // NewRowStreamer returns a RowStreamer 52 func NewRowStreamer(ctx context.Context, cp dbconfigs.Connector, se *schema.Engine, query string, lastpk []sqltypes.Value, send func(*binlogdatapb.VStreamRowsResponse) error, vse *Engine) RowStreamer { 53 return newRowStreamer(ctx, cp, se, query, lastpk, &localVSchema{vschema: &vindexes.VSchema{}}, send, vse) 54 } 55 56 // rowStreamer is used for copying the existing rows of a table 57 // before vreplication begins streaming binlogs. The rowStreamer 58 // responds to a request with the GTID position as of which it 59 // streams the rows of a table. This allows vreplication to synchronize 60 // its events as of the returned GTID before adding the new rows. 61 // For every set of rows sent, the last pk value is also sent. 62 // This allows for the streaming to be resumed based on the last 63 // pk value processed. 64 type rowStreamer struct { 65 ctx context.Context 66 cancel func() 67 68 cp dbconfigs.Connector 69 se *schema.Engine 70 query string 71 lastpk []sqltypes.Value 72 send func(*binlogdatapb.VStreamRowsResponse) error 73 vschema *localVSchema 74 75 plan *Plan 76 pkColumns []int 77 ukColumnNames []string 78 sendQuery string 79 vse *Engine 80 pktsize PacketSizer 81 82 throttleResponseRateLimiter *timer.RateLimiter 83 } 84 85 func newRowStreamer(ctx context.Context, cp dbconfigs.Connector, se *schema.Engine, query string, lastpk []sqltypes.Value, vschema *localVSchema, send func(*binlogdatapb.VStreamRowsResponse) error, vse *Engine) *rowStreamer { 86 ctx, cancel := context.WithCancel(ctx) 87 return &rowStreamer{ 88 ctx: ctx, 89 cancel: cancel, 90 cp: cp, 91 se: se, 92 query: query, 93 lastpk: lastpk, 94 send: send, 95 vschema: vschema, 96 vse: vse, 97 pktsize: DefaultPacketSizer(), 98 99 throttleResponseRateLimiter: timer.NewRateLimiter(rowStreamertHeartbeatInterval), 100 } 101 } 102 103 func (rs *rowStreamer) Cancel() { 104 log.Info("Rowstreamer Cancel() called") 105 rs.cancel() 106 } 107 108 func (rs *rowStreamer) Stream() error { 109 // Ensure sh is Open. If vttablet came up in a non_serving role, 110 // the schema engine may not have been initialized. 111 if err := rs.se.Open(); err != nil { 112 return err 113 } 114 if err := rs.buildPlan(); err != nil { 115 return err 116 } 117 conn, err := snapshotConnect(rs.ctx, rs.cp) 118 if err != nil { 119 return err 120 } 121 defer conn.Close() 122 if _, err := conn.ExecuteFetch("set names binary", 1, false); err != nil { 123 return err 124 } 125 return rs.streamQuery(conn, rs.send) 126 } 127 128 func (rs *rowStreamer) buildPlan() error { 129 // This pre-parsing is required to extract the table name 130 // and create its metadata. 131 sel, fromTable, err := analyzeSelect(rs.query) 132 if err != nil { 133 return err 134 } 135 136 st, err := rs.se.GetTableForPos(fromTable, "") 137 if err != nil { 138 // There is a scenario where vstreamer's table state can be out-of-date, and this happens 139 // with vitess migrations, based on vreplication. 140 // Vitess migrations use an elaborate cut-over flow where tables are swapped away while traffic is 141 // being blocked. The RENAME flow is such that at some point the table is renamed away, leaving a 142 // "puncture"; this is an event the is captured by vstreamer. The completion of the flow fixes the 143 // puncture, and places a new table under the original table's name, but the way it is done does not 144 // cause vstreamer to refresh schema state. 145 // there is therefore a reproducable valid sequence of events where vstreamer thinks a table does not exist, 146 // where it in fact does exist. 147 // For this reason we give vstreamer a "second chance" to review the up-to-date state of the schema. 148 // In the future, we will reduce this operation to reading a single table rather than the entire schema. 149 rs.se.ReloadAt(context.Background(), mysql.Position{}) 150 st, err = rs.se.GetTableForPos(fromTable, "") 151 } 152 if err != nil { 153 return err 154 } 155 ti := &Table{ 156 Name: st.Name, 157 Fields: st.Fields, 158 } 159 // The plan we build is identical to the one for vstreamer. 160 // This is because the row format of a read is identical 161 // to the row format of a binlog event. So, the same 162 // filtering will work. 163 rs.plan, err = buildTablePlan(ti, rs.vschema, rs.query) 164 if err != nil { 165 log.Errorf("%s", err.Error()) 166 return err 167 } 168 169 directives := sel.Comments.Directives() 170 if s, found := directives.GetString("ukColumns", ""); found { 171 rs.ukColumnNames, err = textutil.SplitUnescape(s, ",") 172 if err != nil { 173 return err 174 } 175 } 176 177 rs.pkColumns, err = rs.buildPKColumns(st) 178 if err != nil { 179 return err 180 } 181 rs.sendQuery, err = rs.buildSelect() 182 if err != nil { 183 return err 184 } 185 return err 186 } 187 188 // buildPKColumnsFromUniqueKey assumes a unique key is indicated, 189 func (rs *rowStreamer) buildPKColumnsFromUniqueKey() ([]int, error) { 190 var pkColumns = make([]int, 0) 191 // We wish to utilize a UNIQUE KEY which is not the PRIMARY KEY/ 192 193 for _, colName := range rs.ukColumnNames { 194 index := rs.plan.Table.FindColumn(sqlparser.NewIdentifierCI(colName)) 195 if index < 0 { 196 return pkColumns, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "column %v is listed as unique key, but not present in table %v", colName, rs.plan.Table.Name) 197 } 198 pkColumns = append(pkColumns, index) 199 } 200 return pkColumns, nil 201 } 202 203 func (rs *rowStreamer) buildPKColumns(st *binlogdatapb.MinimalTable) ([]int, error) { 204 if len(rs.ukColumnNames) > 0 { 205 return rs.buildPKColumnsFromUniqueKey() 206 } 207 var pkColumns = make([]int, 0) 208 if len(st.PKColumns) == 0 { 209 // Use a PK equivalent if one exists 210 pkColumns, err := rs.vse.mapPKEquivalentCols(rs.ctx, st) 211 if err == nil && len(pkColumns) != 0 { 212 return pkColumns, nil 213 } 214 215 // Fall back to using every column in the table if there's no PK or PKE 216 pkColumns = make([]int, len(st.Fields)) 217 for i := range st.Fields { 218 pkColumns[i] = i 219 } 220 return pkColumns, nil 221 } 222 for _, pk := range st.PKColumns { 223 if pk >= int64(len(st.Fields)) { 224 return nil, fmt.Errorf("primary key %d refers to non-existent column", pk) 225 } 226 pkColumns = append(pkColumns, int(pk)) 227 } 228 return pkColumns, nil 229 } 230 231 func (rs *rowStreamer) buildSelect() (string, error) { 232 buf := sqlparser.NewTrackedBuffer(nil) 233 // We could have used select *, but being explicit is more predictable. 234 buf.Myprintf("select ") 235 prefix := "" 236 for _, col := range rs.plan.Table.Fields { 237 if rs.plan.isConvertColumnUsingUTF8(col.Name) { 238 buf.Myprintf("%sconvert(%v using utf8mb4) as %v", prefix, sqlparser.NewIdentifierCI(col.Name), sqlparser.NewIdentifierCI(col.Name)) 239 } else if funcExpr := rs.plan.getColumnFuncExpr(col.Name); funcExpr != nil { 240 buf.Myprintf("%s%s as %v", prefix, sqlparser.String(funcExpr), sqlparser.NewIdentifierCI(col.Name)) 241 } else { 242 buf.Myprintf("%s%v", prefix, sqlparser.NewIdentifierCI(col.Name)) 243 } 244 prefix = ", " 245 } 246 buf.Myprintf(" from %v", sqlparser.NewIdentifierCS(rs.plan.Table.Name)) 247 if len(rs.lastpk) != 0 { 248 if len(rs.lastpk) != len(rs.pkColumns) { 249 return "", fmt.Errorf("primary key values don't match length: %v vs %v", rs.lastpk, rs.pkColumns) 250 } 251 buf.WriteString(" where ") 252 prefix := "" 253 // This loop handles the case for composite pks. For example, 254 // if lastpk was (1,2), the where clause would be: 255 // (col1 = 1 and col2 > 2) or (col1 > 1). 256 // A tuple inequality like (col1,col2) > (1,2) ends up 257 // being a full table scan for mysql. 258 for lastcol := len(rs.pkColumns) - 1; lastcol >= 0; lastcol-- { 259 buf.Myprintf("%s(", prefix) 260 prefix = " or " 261 for i, pk := range rs.pkColumns[:lastcol] { 262 buf.Myprintf("%v = ", sqlparser.NewIdentifierCI(rs.plan.Table.Fields[pk].Name)) 263 rs.lastpk[i].EncodeSQL(buf) 264 buf.Myprintf(" and ") 265 } 266 buf.Myprintf("%v > ", sqlparser.NewIdentifierCI(rs.plan.Table.Fields[rs.pkColumns[lastcol]].Name)) 267 rs.lastpk[lastcol].EncodeSQL(buf) 268 buf.Myprintf(")") 269 } 270 } 271 buf.Myprintf(" order by ", sqlparser.NewIdentifierCS(rs.plan.Table.Name)) 272 prefix = "" 273 for _, pk := range rs.pkColumns { 274 buf.Myprintf("%s%v", prefix, sqlparser.NewIdentifierCI(rs.plan.Table.Fields[pk].Name)) 275 prefix = ", " 276 } 277 return buf.String(), nil 278 } 279 280 func (rs *rowStreamer) streamQuery(conn *snapshotConn, send func(*binlogdatapb.VStreamRowsResponse) error) error { 281 282 var sendMu sync.Mutex 283 safeSend := func(r *binlogdatapb.VStreamRowsResponse) error { 284 sendMu.Lock() 285 defer sendMu.Unlock() 286 return send(r) 287 } 288 // Let's wait until MySQL is in good shape to stream rows 289 if err := rs.vse.waitForMySQL(rs.ctx, rs.cp, rs.plan.Table.Name); err != nil { 290 return err 291 } 292 293 log.Infof("Streaming query: %v\n", rs.sendQuery) 294 gtid, rotatedLog, err := conn.streamWithSnapshot(rs.ctx, rs.plan.Table.Name, rs.sendQuery) 295 if rotatedLog { 296 rs.vse.vstreamerFlushedBinlogs.Add(1) 297 } 298 if err != nil { 299 return err 300 } 301 302 // first call the callback with the fields 303 flds, err := conn.Fields() 304 if err != nil { 305 return err 306 } 307 pkfields := make([]*querypb.Field, len(rs.pkColumns)) 308 for i, pk := range rs.pkColumns { 309 pkfields[i] = &querypb.Field{ 310 Name: flds[pk].Name, 311 Type: flds[pk].Type, 312 } 313 } 314 315 charsets := make([]collations.ID, len(flds)) 316 for i, fld := range flds { 317 charsets[i] = collations.ID(fld.Charset) 318 } 319 320 err = safeSend(&binlogdatapb.VStreamRowsResponse{ 321 Fields: rs.plan.fields(), 322 Pkfields: pkfields, 323 Gtid: gtid, 324 }) 325 if err != nil { 326 return fmt.Errorf("stream send error: %v", err) 327 } 328 329 // streamQuery sends heartbeats as long as it operates 330 heartbeatTicker := time.NewTicker(rowStreamertHeartbeatInterval) 331 defer heartbeatTicker.Stop() 332 go func() { 333 for range heartbeatTicker.C { 334 safeSend(&binlogdatapb.VStreamRowsResponse{Heartbeat: true}) 335 } 336 }() 337 338 var response binlogdatapb.VStreamRowsResponse 339 var rows []*querypb.Row 340 var rowCount int 341 var mysqlrow []sqltypes.Value 342 343 filtered := make([]sqltypes.Value, len(rs.plan.ColExprs)) 344 lastpk := make([]sqltypes.Value, len(rs.pkColumns)) 345 byteCount := 0 346 for { 347 if rs.ctx.Err() != nil { 348 log.Infof("Stream ended because of ctx.Done") 349 return fmt.Errorf("stream ended: %v", rs.ctx.Err()) 350 } 351 352 // check throttler. 353 if !rs.vse.throttlerClient.ThrottleCheckOKOrWait(rs.ctx) { 354 rs.throttleResponseRateLimiter.Do(func() error { 355 return safeSend(&binlogdatapb.VStreamRowsResponse{Throttled: true}) 356 }) 357 continue 358 } 359 360 if mysqlrow != nil { 361 mysqlrow = mysqlrow[:0] 362 } 363 mysqlrow, err = conn.FetchNext(mysqlrow) 364 if err != nil { 365 return err 366 } 367 if mysqlrow == nil { 368 break 369 } 370 // Compute lastpk here, because we'll need it 371 // at the end after the loop exits. 372 for i, pk := range rs.pkColumns { 373 lastpk[i] = mysqlrow[pk] 374 } 375 // Reuse the vstreamer's filter. 376 ok, err := rs.plan.filter(mysqlrow, filtered, charsets) 377 if err != nil { 378 return err 379 } 380 if ok { 381 if rowCount >= len(rows) { 382 rows = append(rows, &querypb.Row{}) 383 } 384 byteCount += sqltypes.RowToProto3Inplace(filtered, rows[rowCount]) 385 rowCount++ 386 } 387 388 if rs.pktsize.ShouldSend(byteCount) { 389 response.Rows = rows[:rowCount] 390 response.Lastpk = sqltypes.RowToProto3(lastpk) 391 392 rs.vse.rowStreamerNumRows.Add(int64(len(response.Rows))) 393 rs.vse.rowStreamerNumPackets.Add(int64(1)) 394 startSend := time.Now() 395 err = safeSend(&response) 396 if err != nil { 397 return err 398 } 399 rs.pktsize.Record(byteCount, time.Since(startSend)) 400 rowCount = 0 401 byteCount = 0 402 } 403 } 404 405 if rowCount > 0 { 406 response.Rows = rows[:rowCount] 407 response.Lastpk = sqltypes.RowToProto3(lastpk) 408 409 rs.vse.rowStreamerNumRows.Add(int64(len(response.Rows))) 410 err = safeSend(&response) 411 if err != nil { 412 return err 413 } 414 } 415 416 return nil 417 }