vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/connpool/dbconn.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package connpool 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 "sync" 24 "time" 25 26 "vitess.io/vitess/go/pools" 27 "vitess.io/vitess/go/vt/dbconfigs" 28 "vitess.io/vitess/go/vt/servenv" 29 "vitess.io/vitess/go/vt/vterrors" 30 31 "vitess.io/vitess/go/mysql" 32 "vitess.io/vitess/go/sqltypes" 33 "vitess.io/vitess/go/sync2" 34 "vitess.io/vitess/go/trace" 35 "vitess.io/vitess/go/vt/dbconnpool" 36 "vitess.io/vitess/go/vt/log" 37 "vitess.io/vitess/go/vt/sqlparser" 38 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 39 40 querypb "vitess.io/vitess/go/vt/proto/query" 41 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 42 ) 43 44 // DBConn is a db connection for tabletserver. 45 // It performs automatic reconnects as needed. 46 // Its Execute function has a timeout that can kill 47 // its own queries and the underlying connection. 48 // It will also trigger a CheckMySQL whenever applicable. 49 type DBConn struct { 50 conn *dbconnpool.DBConnection 51 info dbconfigs.Connector 52 pool *Pool 53 dbaPool *dbconnpool.ConnectionPool 54 stats *tabletenv.Stats 55 current sync2.AtomicString 56 timeCreated time.Time 57 setting string 58 resetSetting string 59 60 // err will be set if a query is killed through a Kill. 61 errmu sync.Mutex 62 err error 63 } 64 65 // NewDBConn creates a new DBConn. It triggers a CheckMySQL if creation fails. 66 func NewDBConn(ctx context.Context, cp *Pool, appParams dbconfigs.Connector) (*DBConn, error) { 67 start := time.Now() 68 defer cp.env.Stats().MySQLTimings.Record("Connect", start) 69 70 c, err := dbconnpool.NewDBConnection(ctx, appParams) 71 if err != nil { 72 cp.env.Stats().MySQLTimings.Record("ConnectError", start) 73 cp.env.CheckMySQL() 74 return nil, err 75 } 76 return &DBConn{ 77 conn: c, 78 info: appParams, 79 pool: cp, 80 dbaPool: cp.dbaPool, 81 timeCreated: time.Now(), 82 stats: cp.env.Stats(), 83 }, nil 84 } 85 86 // NewDBConnNoPool creates a new DBConn without a pool. 87 func NewDBConnNoPool(ctx context.Context, params dbconfigs.Connector, dbaPool *dbconnpool.ConnectionPool, setting *pools.Setting) (*DBConn, error) { 88 c, err := dbconnpool.NewDBConnection(ctx, params) 89 if err != nil { 90 return nil, err 91 } 92 dbconn := &DBConn{ 93 conn: c, 94 info: params, 95 dbaPool: dbaPool, 96 pool: nil, 97 timeCreated: time.Now(), 98 stats: tabletenv.NewStats(servenv.NewExporter("Temp", "Tablet")), 99 } 100 if setting == nil { 101 return dbconn, nil 102 } 103 if err = dbconn.ApplySetting(ctx, setting); err != nil { 104 dbconn.Close() 105 return nil, err 106 } 107 return dbconn, nil 108 } 109 110 // Err returns an error if there was a client initiated error 111 // like a query kill. 112 func (dbc *DBConn) Err() error { 113 dbc.errmu.Lock() 114 defer dbc.errmu.Unlock() 115 return dbc.err 116 } 117 118 // Exec executes the specified query. If there is a connection error, it will reconnect 119 // and retry. A failed reconnect will trigger a CheckMySQL. 120 func (dbc *DBConn) Exec(ctx context.Context, query string, maxrows int, wantfields bool) (*sqltypes.Result, error) { 121 span, ctx := trace.NewSpan(ctx, "DBConn.Exec") 122 defer span.Finish() 123 124 for attempt := 1; attempt <= 2; attempt++ { 125 r, err := dbc.execOnce(ctx, query, maxrows, wantfields) 126 switch { 127 case err == nil: 128 // Success. 129 return r, nil 130 case mysql.IsConnLostDuringQuery(err): 131 // Query probably killed. Don't retry. 132 return nil, err 133 case !mysql.IsConnErr(err): 134 // Not a connection error. Don't retry. 135 return nil, err 136 case attempt == 2: 137 // Reached the retry limit. 138 return nil, err 139 } 140 141 // Connection error. Retry if context has not expired. 142 select { 143 case <-ctx.Done(): 144 return nil, err 145 default: 146 } 147 148 if reconnectErr := dbc.reconnect(ctx); reconnectErr != nil { 149 dbc.pool.env.CheckMySQL() 150 // Return the error of the reconnect and not the original connection error. 151 return nil, reconnectErr 152 } 153 154 // Reconnect succeeded. Retry query at second attempt. 155 } 156 panic("unreachable") 157 } 158 159 func (dbc *DBConn) execOnce(ctx context.Context, query string, maxrows int, wantfields bool) (*sqltypes.Result, error) { 160 dbc.current.Set(query) 161 defer dbc.current.Set("") 162 163 // Check if the context is already past its deadline before 164 // trying to execute the query. 165 select { 166 case <-ctx.Done(): 167 return nil, fmt.Errorf("%v before execution started", ctx.Err()) 168 default: 169 } 170 171 defer dbc.stats.MySQLTimings.Record("Exec", time.Now()) 172 173 done, wg := dbc.setDeadline(ctx) 174 qr, err := dbc.conn.ExecuteFetch(query, maxrows, wantfields) 175 176 if done != nil { 177 close(done) 178 wg.Wait() 179 } 180 if dbcerr := dbc.Err(); dbcerr != nil { 181 return nil, dbcerr 182 } 183 return qr, err 184 } 185 186 // ExecOnce executes the specified query, but does not retry on connection errors. 187 func (dbc *DBConn) ExecOnce(ctx context.Context, query string, maxrows int, wantfields bool) (*sqltypes.Result, error) { 188 return dbc.execOnce(ctx, query, maxrows, wantfields) 189 } 190 191 // FetchNext returns the next result set. 192 func (dbc *DBConn) FetchNext(ctx context.Context, maxrows int, wantfields bool) (*sqltypes.Result, error) { 193 // Check if the context is already past its deadline before 194 // trying to fetch the next result. 195 select { 196 case <-ctx.Done(): 197 return nil, fmt.Errorf("%v before reading next result set", ctx.Err()) 198 default: 199 } 200 res, _, _, err := dbc.conn.ReadQueryResult(maxrows, wantfields) 201 if err != nil { 202 return nil, err 203 } 204 return res, err 205 206 } 207 208 // Stream executes the query and streams the results. 209 func (dbc *DBConn) Stream(ctx context.Context, query string, callback func(*sqltypes.Result) error, alloc func() *sqltypes.Result, streamBufferSize int, includedFields querypb.ExecuteOptions_IncludedFields) error { 210 span, ctx := trace.NewSpan(ctx, "DBConn.Stream") 211 trace.AnnotateSQL(span, sqlparser.Preview(query)) 212 defer span.Finish() 213 214 resultSent := false 215 for attempt := 1; attempt <= 2; attempt++ { 216 err := dbc.streamOnce( 217 ctx, 218 query, 219 func(r *sqltypes.Result) error { 220 if !resultSent { 221 resultSent = true 222 r = r.StripMetadata(includedFields) 223 } 224 return callback(r) 225 }, 226 alloc, 227 streamBufferSize, 228 ) 229 switch { 230 case err == nil: 231 // Success. 232 return nil 233 case mysql.IsConnLostDuringQuery(err): 234 // Query probably killed. Don't retry. 235 return err 236 case !mysql.IsConnErr(err): 237 // Not a connection error. Don't retry. 238 return err 239 case attempt == 2: 240 // Reached the retry limit. 241 return err 242 case resultSent: 243 // Don't retry if streaming has started. 244 return err 245 } 246 247 // Connection error. Retry if context has not expired. 248 select { 249 case <-ctx.Done(): 250 return err 251 default: 252 } 253 if reconnectErr := dbc.reconnect(ctx); reconnectErr != nil { 254 dbc.pool.env.CheckMySQL() 255 // Return the error of the reconnect and not the original connection error. 256 return reconnectErr 257 } 258 } 259 panic("unreachable") 260 } 261 262 func (dbc *DBConn) streamOnce(ctx context.Context, query string, callback func(*sqltypes.Result) error, alloc func() *sqltypes.Result, streamBufferSize int) error { 263 defer dbc.stats.MySQLTimings.Record("ExecStream", time.Now()) 264 265 dbc.current.Set(query) 266 defer dbc.current.Set("") 267 268 done, wg := dbc.setDeadline(ctx) 269 err := dbc.conn.ExecuteStreamFetch(query, callback, alloc, streamBufferSize) 270 271 if done != nil { 272 close(done) 273 wg.Wait() 274 } 275 if dbcerr := dbc.Err(); dbcerr != nil { 276 return dbcerr 277 } 278 return err 279 } 280 281 // StreamOnce executes the query and streams the results. But, does not retry on connection errors. 282 func (dbc *DBConn) StreamOnce(ctx context.Context, query string, callback func(*sqltypes.Result) error, alloc func() *sqltypes.Result, streamBufferSize int, includedFields querypb.ExecuteOptions_IncludedFields) error { 283 resultSent := false 284 return dbc.streamOnce( 285 ctx, 286 query, 287 func(r *sqltypes.Result) error { 288 if !resultSent { 289 resultSent = true 290 r = r.StripMetadata(includedFields) 291 } 292 return callback(r) 293 }, 294 alloc, 295 streamBufferSize, 296 ) 297 } 298 299 var ( 300 getModeSQL = "select @@global.sql_mode" 301 getAutocommit = "select @@autocommit" 302 getAutoIsNull = "select @@sql_auto_is_null" 303 ) 304 305 // VerifyMode is a helper method to verify mysql is running with 306 // sql_mode = STRICT_TRANS_TABLES or STRICT_ALL_TABLES and autocommit=ON. 307 func (dbc *DBConn) VerifyMode(strictTransTables bool) error { 308 if strictTransTables { 309 qr, err := dbc.conn.ExecuteFetch(getModeSQL, 2, false) 310 if err != nil { 311 return err 312 } 313 if len(qr.Rows) != 1 { 314 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "incorrect rowcount received for %s: %d", getModeSQL, len(qr.Rows)) 315 } 316 sqlMode := qr.Rows[0][0].ToString() 317 if !(strings.Contains(sqlMode, "STRICT_TRANS_TABLES") || strings.Contains(sqlMode, "STRICT_ALL_TABLES")) { 318 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "require sql_mode to be STRICT_TRANS_TABLES or STRICT_ALL_TABLES: got '%s'", qr.Rows[0][0].ToString()) 319 } 320 } 321 qr, err := dbc.conn.ExecuteFetch(getAutocommit, 2, false) 322 if err != nil { 323 return err 324 } 325 if len(qr.Rows) != 1 { 326 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "incorrect rowcount received for %s: %d", getAutocommit, len(qr.Rows)) 327 } 328 if !strings.Contains(qr.Rows[0][0].ToString(), "1") { 329 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "require autocommit to be 1: got %s", qr.Rows[0][0].ToString()) 330 } 331 qr, err = dbc.conn.ExecuteFetch(getAutoIsNull, 2, false) 332 if err != nil { 333 return err 334 } 335 if len(qr.Rows) != 1 { 336 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "incorrect rowcount received for %s: %d", getAutoIsNull, len(qr.Rows)) 337 } 338 if !strings.Contains(qr.Rows[0][0].ToString(), "0") { 339 return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "require sql_auto_is_null to be 0: got %s", qr.Rows[0][0].ToString()) 340 } 341 return nil 342 } 343 344 // Close closes the DBConn. 345 func (dbc *DBConn) Close() { 346 dbc.conn.Close() 347 } 348 349 // ApplySetting implements the pools.Resource interface. 350 func (dbc *DBConn) ApplySetting(ctx context.Context, setting *pools.Setting) error { 351 query := setting.GetQuery() 352 if _, err := dbc.execOnce(ctx, query, 1, false); err != nil { 353 return err 354 } 355 dbc.setting = query 356 dbc.resetSetting = setting.GetResetQuery() 357 return nil 358 } 359 360 // IsSettingApplied implements the pools.Resource interface. 361 func (dbc *DBConn) IsSettingApplied() bool { 362 return dbc.setting != "" 363 } 364 365 // IsSameSetting implements the pools.Resource interface. 366 func (dbc *DBConn) IsSameSetting(setting string) bool { 367 return strings.EqualFold(setting, dbc.setting) 368 } 369 370 // ResetSetting implements the pools.Resource interface. 371 func (dbc *DBConn) ResetSetting(ctx context.Context) error { 372 if _, err := dbc.execOnce(ctx, dbc.resetSetting, 1, false); err != nil { 373 return err 374 } 375 dbc.setting = "" 376 dbc.resetSetting = "" 377 return nil 378 } 379 380 var _ pools.Resource = (*DBConn)(nil) 381 382 // IsClosed returns true if DBConn is closed. 383 func (dbc *DBConn) IsClosed() bool { 384 return dbc.conn.IsClosed() 385 } 386 387 // Expired returns whether a connection has passed its lifetime 388 func (dbc *DBConn) Expired(lifetimeTimeout time.Duration) bool { 389 return lifetimeTimeout > 0 && time.Until(dbc.timeCreated.Add(lifetimeTimeout)) < 0 390 } 391 392 // Recycle returns the DBConn to the pool. 393 func (dbc *DBConn) Recycle() { 394 switch { 395 case dbc.pool == nil: 396 dbc.Close() 397 case dbc.conn.IsClosed(): 398 dbc.pool.Put(nil) 399 default: 400 dbc.pool.Put(dbc) 401 } 402 } 403 404 // Taint unregister connection from original pool and taints the connection. 405 func (dbc *DBConn) Taint() { 406 if dbc.pool == nil { 407 return 408 } 409 dbc.pool.Put(nil) 410 dbc.pool = nil 411 } 412 413 // Kill kills the currently executing query both on MySQL side 414 // and on the connection side. If no query is executing, it's a no-op. 415 // Kill will also not kill a query more than once. 416 func (dbc *DBConn) Kill(reason string, elapsed time.Duration) error { 417 dbc.stats.KillCounters.Add("Queries", 1) 418 log.Infof("Due to %s, elapsed time: %v, killing query ID %v %s", reason, elapsed, dbc.conn.ID(), dbc.CurrentForLogging()) 419 420 // Client side action. Set error and close connection. 421 dbc.errmu.Lock() 422 dbc.err = vterrors.Errorf(vtrpcpb.Code_CANCELED, "(errno 2013) due to %s, elapsed time: %v, killing query ID %v", reason, elapsed, dbc.conn.ID()) 423 dbc.errmu.Unlock() 424 dbc.conn.Close() 425 426 // Server side action. Kill the session. 427 killConn, err := dbc.dbaPool.Get(context.TODO()) 428 if err != nil { 429 log.Warningf("Failed to get conn from dba pool: %v", err) 430 return err 431 } 432 defer killConn.Recycle() 433 sql := fmt.Sprintf("kill %d", dbc.conn.ID()) 434 _, err = killConn.ExecuteFetch(sql, 10000, false) 435 if err != nil { 436 log.Errorf("Could not kill query ID %v %s: %v", dbc.conn.ID(), 437 dbc.CurrentForLogging(), err) 438 return err 439 } 440 return nil 441 } 442 443 // Current returns the currently executing query. 444 func (dbc *DBConn) Current() string { 445 return dbc.current.Get() 446 } 447 448 // ID returns the connection id. 449 func (dbc *DBConn) ID() int64 { 450 return dbc.conn.ID() 451 } 452 453 // BaseShowTables returns a query that shows tables 454 func (dbc *DBConn) BaseShowTables() string { 455 return dbc.conn.BaseShowTables() 456 } 457 458 // BaseShowTablesWithSizes returns a query that shows tables and their sizes 459 func (dbc *DBConn) BaseShowTablesWithSizes() string { 460 return dbc.conn.BaseShowTablesWithSizes() 461 } 462 463 func (dbc *DBConn) reconnect(ctx context.Context) error { 464 dbc.conn.Close() 465 // Reuse MySQLTimings from dbc.conn. 466 newConn, err := dbconnpool.NewDBConnection(ctx, dbc.info) 467 if err != nil { 468 return err 469 } 470 dbc.conn = newConn 471 if dbc.IsSettingApplied() { 472 err = dbc.applySameSetting(ctx) 473 if err != nil { 474 return err 475 } 476 } 477 dbc.errmu.Lock() 478 dbc.err = nil 479 dbc.errmu.Unlock() 480 return nil 481 } 482 483 // setDeadline starts a goroutine that will kill the currently executing query 484 // if the deadline is exceeded. It returns a channel and a waitgroup. After the 485 // query is done executing, the caller is required to close the done channel 486 // and wait for the waitgroup to make sure that the necessary cleanup is done. 487 func (dbc *DBConn) setDeadline(ctx context.Context) (chan bool, *sync.WaitGroup) { 488 if ctx.Done() == nil { 489 return nil, nil 490 } 491 done := make(chan bool) 492 var wg sync.WaitGroup 493 wg.Add(1) 494 go func() { 495 defer wg.Done() 496 startTime := time.Now() 497 select { 498 case <-ctx.Done(): 499 dbc.Kill(ctx.Err().Error(), time.Since(startTime)) 500 case <-done: 501 return 502 } 503 elapsed := time.Since(startTime) 504 505 // Give 2x the elapsed time and some buffer as grace period 506 // for the query to get killed. 507 tmr2 := time.NewTimer(2*elapsed + 5*time.Second) 508 defer tmr2.Stop() 509 select { 510 case <-tmr2.C: 511 dbc.stats.InternalErrors.Add("HungQuery", 1) 512 log.Warningf("Query may be hung: %s", dbc.CurrentForLogging()) 513 case <-done: 514 return 515 } 516 <-done 517 log.Warningf("Hung query returned") 518 }() 519 return done, &wg 520 } 521 522 // CurrentForLogging applies transformations to the query making it suitable to log. 523 // It applies sanitization rules based on tablet settings and limits the max length of 524 // queries. 525 func (dbc *DBConn) CurrentForLogging() string { 526 var queryToLog string 527 if dbc.pool != nil && dbc.pool.env != nil && dbc.pool.env.Config() != nil && !dbc.pool.env.Config().SanitizeLogMessages { 528 queryToLog = dbc.Current() 529 } else { 530 queryToLog, _ = sqlparser.RedactSQLQuery(dbc.Current()) 531 } 532 return sqlparser.TruncateForLog(queryToLog) 533 } 534 535 func (dbc *DBConn) applySameSetting(ctx context.Context) (err error) { 536 _, err = dbc.execOnce(ctx, dbc.setting, 1, false) 537 return 538 }