vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/connpool/dbconn.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package connpool
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"vitess.io/vitess/go/pools"
    27  	"vitess.io/vitess/go/vt/dbconfigs"
    28  	"vitess.io/vitess/go/vt/servenv"
    29  	"vitess.io/vitess/go/vt/vterrors"
    30  
    31  	"vitess.io/vitess/go/mysql"
    32  	"vitess.io/vitess/go/sqltypes"
    33  	"vitess.io/vitess/go/sync2"
    34  	"vitess.io/vitess/go/trace"
    35  	"vitess.io/vitess/go/vt/dbconnpool"
    36  	"vitess.io/vitess/go/vt/log"
    37  	"vitess.io/vitess/go/vt/sqlparser"
    38  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    39  
    40  	querypb "vitess.io/vitess/go/vt/proto/query"
    41  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    42  )
    43  
    44  // DBConn is a db connection for tabletserver.
    45  // It performs automatic reconnects as needed.
    46  // Its Execute function has a timeout that can kill
    47  // its own queries and the underlying connection.
    48  // It will also trigger a CheckMySQL whenever applicable.
    49  type DBConn struct {
    50  	conn         *dbconnpool.DBConnection
    51  	info         dbconfigs.Connector
    52  	pool         *Pool
    53  	dbaPool      *dbconnpool.ConnectionPool
    54  	stats        *tabletenv.Stats
    55  	current      sync2.AtomicString
    56  	timeCreated  time.Time
    57  	setting      string
    58  	resetSetting string
    59  
    60  	// err will be set if a query is killed through a Kill.
    61  	errmu sync.Mutex
    62  	err   error
    63  }
    64  
    65  // NewDBConn creates a new DBConn. It triggers a CheckMySQL if creation fails.
    66  func NewDBConn(ctx context.Context, cp *Pool, appParams dbconfigs.Connector) (*DBConn, error) {
    67  	start := time.Now()
    68  	defer cp.env.Stats().MySQLTimings.Record("Connect", start)
    69  
    70  	c, err := dbconnpool.NewDBConnection(ctx, appParams)
    71  	if err != nil {
    72  		cp.env.Stats().MySQLTimings.Record("ConnectError", start)
    73  		cp.env.CheckMySQL()
    74  		return nil, err
    75  	}
    76  	return &DBConn{
    77  		conn:        c,
    78  		info:        appParams,
    79  		pool:        cp,
    80  		dbaPool:     cp.dbaPool,
    81  		timeCreated: time.Now(),
    82  		stats:       cp.env.Stats(),
    83  	}, nil
    84  }
    85  
    86  // NewDBConnNoPool creates a new DBConn without a pool.
    87  func NewDBConnNoPool(ctx context.Context, params dbconfigs.Connector, dbaPool *dbconnpool.ConnectionPool, setting *pools.Setting) (*DBConn, error) {
    88  	c, err := dbconnpool.NewDBConnection(ctx, params)
    89  	if err != nil {
    90  		return nil, err
    91  	}
    92  	dbconn := &DBConn{
    93  		conn:        c,
    94  		info:        params,
    95  		dbaPool:     dbaPool,
    96  		pool:        nil,
    97  		timeCreated: time.Now(),
    98  		stats:       tabletenv.NewStats(servenv.NewExporter("Temp", "Tablet")),
    99  	}
   100  	if setting == nil {
   101  		return dbconn, nil
   102  	}
   103  	if err = dbconn.ApplySetting(ctx, setting); err != nil {
   104  		dbconn.Close()
   105  		return nil, err
   106  	}
   107  	return dbconn, nil
   108  }
   109  
   110  // Err returns an error if there was a client initiated error
   111  // like a query kill.
   112  func (dbc *DBConn) Err() error {
   113  	dbc.errmu.Lock()
   114  	defer dbc.errmu.Unlock()
   115  	return dbc.err
   116  }
   117  
   118  // Exec executes the specified query. If there is a connection error, it will reconnect
   119  // and retry. A failed reconnect will trigger a CheckMySQL.
   120  func (dbc *DBConn) Exec(ctx context.Context, query string, maxrows int, wantfields bool) (*sqltypes.Result, error) {
   121  	span, ctx := trace.NewSpan(ctx, "DBConn.Exec")
   122  	defer span.Finish()
   123  
   124  	for attempt := 1; attempt <= 2; attempt++ {
   125  		r, err := dbc.execOnce(ctx, query, maxrows, wantfields)
   126  		switch {
   127  		case err == nil:
   128  			// Success.
   129  			return r, nil
   130  		case mysql.IsConnLostDuringQuery(err):
   131  			// Query probably killed. Don't retry.
   132  			return nil, err
   133  		case !mysql.IsConnErr(err):
   134  			// Not a connection error. Don't retry.
   135  			return nil, err
   136  		case attempt == 2:
   137  			// Reached the retry limit.
   138  			return nil, err
   139  		}
   140  
   141  		// Connection error. Retry if context has not expired.
   142  		select {
   143  		case <-ctx.Done():
   144  			return nil, err
   145  		default:
   146  		}
   147  
   148  		if reconnectErr := dbc.reconnect(ctx); reconnectErr != nil {
   149  			dbc.pool.env.CheckMySQL()
   150  			// Return the error of the reconnect and not the original connection error.
   151  			return nil, reconnectErr
   152  		}
   153  
   154  		// Reconnect succeeded. Retry query at second attempt.
   155  	}
   156  	panic("unreachable")
   157  }
   158  
   159  func (dbc *DBConn) execOnce(ctx context.Context, query string, maxrows int, wantfields bool) (*sqltypes.Result, error) {
   160  	dbc.current.Set(query)
   161  	defer dbc.current.Set("")
   162  
   163  	// Check if the context is already past its deadline before
   164  	// trying to execute the query.
   165  	select {
   166  	case <-ctx.Done():
   167  		return nil, fmt.Errorf("%v before execution started", ctx.Err())
   168  	default:
   169  	}
   170  
   171  	defer dbc.stats.MySQLTimings.Record("Exec", time.Now())
   172  
   173  	done, wg := dbc.setDeadline(ctx)
   174  	qr, err := dbc.conn.ExecuteFetch(query, maxrows, wantfields)
   175  
   176  	if done != nil {
   177  		close(done)
   178  		wg.Wait()
   179  	}
   180  	if dbcerr := dbc.Err(); dbcerr != nil {
   181  		return nil, dbcerr
   182  	}
   183  	return qr, err
   184  }
   185  
   186  // ExecOnce executes the specified query, but does not retry on connection errors.
   187  func (dbc *DBConn) ExecOnce(ctx context.Context, query string, maxrows int, wantfields bool) (*sqltypes.Result, error) {
   188  	return dbc.execOnce(ctx, query, maxrows, wantfields)
   189  }
   190  
   191  // FetchNext returns the next result set.
   192  func (dbc *DBConn) FetchNext(ctx context.Context, maxrows int, wantfields bool) (*sqltypes.Result, error) {
   193  	// Check if the context is already past its deadline before
   194  	// trying to fetch the next result.
   195  	select {
   196  	case <-ctx.Done():
   197  		return nil, fmt.Errorf("%v before reading next result set", ctx.Err())
   198  	default:
   199  	}
   200  	res, _, _, err := dbc.conn.ReadQueryResult(maxrows, wantfields)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	return res, err
   205  
   206  }
   207  
   208  // Stream executes the query and streams the results.
   209  func (dbc *DBConn) Stream(ctx context.Context, query string, callback func(*sqltypes.Result) error, alloc func() *sqltypes.Result, streamBufferSize int, includedFields querypb.ExecuteOptions_IncludedFields) error {
   210  	span, ctx := trace.NewSpan(ctx, "DBConn.Stream")
   211  	trace.AnnotateSQL(span, sqlparser.Preview(query))
   212  	defer span.Finish()
   213  
   214  	resultSent := false
   215  	for attempt := 1; attempt <= 2; attempt++ {
   216  		err := dbc.streamOnce(
   217  			ctx,
   218  			query,
   219  			func(r *sqltypes.Result) error {
   220  				if !resultSent {
   221  					resultSent = true
   222  					r = r.StripMetadata(includedFields)
   223  				}
   224  				return callback(r)
   225  			},
   226  			alloc,
   227  			streamBufferSize,
   228  		)
   229  		switch {
   230  		case err == nil:
   231  			// Success.
   232  			return nil
   233  		case mysql.IsConnLostDuringQuery(err):
   234  			// Query probably killed. Don't retry.
   235  			return err
   236  		case !mysql.IsConnErr(err):
   237  			// Not a connection error. Don't retry.
   238  			return err
   239  		case attempt == 2:
   240  			// Reached the retry limit.
   241  			return err
   242  		case resultSent:
   243  			// Don't retry if streaming has started.
   244  			return err
   245  		}
   246  
   247  		// Connection error. Retry if context has not expired.
   248  		select {
   249  		case <-ctx.Done():
   250  			return err
   251  		default:
   252  		}
   253  		if reconnectErr := dbc.reconnect(ctx); reconnectErr != nil {
   254  			dbc.pool.env.CheckMySQL()
   255  			// Return the error of the reconnect and not the original connection error.
   256  			return reconnectErr
   257  		}
   258  	}
   259  	panic("unreachable")
   260  }
   261  
   262  func (dbc *DBConn) streamOnce(ctx context.Context, query string, callback func(*sqltypes.Result) error, alloc func() *sqltypes.Result, streamBufferSize int) error {
   263  	defer dbc.stats.MySQLTimings.Record("ExecStream", time.Now())
   264  
   265  	dbc.current.Set(query)
   266  	defer dbc.current.Set("")
   267  
   268  	done, wg := dbc.setDeadline(ctx)
   269  	err := dbc.conn.ExecuteStreamFetch(query, callback, alloc, streamBufferSize)
   270  
   271  	if done != nil {
   272  		close(done)
   273  		wg.Wait()
   274  	}
   275  	if dbcerr := dbc.Err(); dbcerr != nil {
   276  		return dbcerr
   277  	}
   278  	return err
   279  }
   280  
   281  // StreamOnce executes the query and streams the results. But, does not retry on connection errors.
   282  func (dbc *DBConn) StreamOnce(ctx context.Context, query string, callback func(*sqltypes.Result) error, alloc func() *sqltypes.Result, streamBufferSize int, includedFields querypb.ExecuteOptions_IncludedFields) error {
   283  	resultSent := false
   284  	return dbc.streamOnce(
   285  		ctx,
   286  		query,
   287  		func(r *sqltypes.Result) error {
   288  			if !resultSent {
   289  				resultSent = true
   290  				r = r.StripMetadata(includedFields)
   291  			}
   292  			return callback(r)
   293  		},
   294  		alloc,
   295  		streamBufferSize,
   296  	)
   297  }
   298  
   299  var (
   300  	getModeSQL    = "select @@global.sql_mode"
   301  	getAutocommit = "select @@autocommit"
   302  	getAutoIsNull = "select @@sql_auto_is_null"
   303  )
   304  
   305  // VerifyMode is a helper method to verify mysql is running with
   306  // sql_mode = STRICT_TRANS_TABLES or STRICT_ALL_TABLES and autocommit=ON.
   307  func (dbc *DBConn) VerifyMode(strictTransTables bool) error {
   308  	if strictTransTables {
   309  		qr, err := dbc.conn.ExecuteFetch(getModeSQL, 2, false)
   310  		if err != nil {
   311  			return err
   312  		}
   313  		if len(qr.Rows) != 1 {
   314  			return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "incorrect rowcount received for %s: %d", getModeSQL, len(qr.Rows))
   315  		}
   316  		sqlMode := qr.Rows[0][0].ToString()
   317  		if !(strings.Contains(sqlMode, "STRICT_TRANS_TABLES") || strings.Contains(sqlMode, "STRICT_ALL_TABLES")) {
   318  			return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "require sql_mode to be STRICT_TRANS_TABLES or STRICT_ALL_TABLES: got '%s'", qr.Rows[0][0].ToString())
   319  		}
   320  	}
   321  	qr, err := dbc.conn.ExecuteFetch(getAutocommit, 2, false)
   322  	if err != nil {
   323  		return err
   324  	}
   325  	if len(qr.Rows) != 1 {
   326  		return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "incorrect rowcount received for %s: %d", getAutocommit, len(qr.Rows))
   327  	}
   328  	if !strings.Contains(qr.Rows[0][0].ToString(), "1") {
   329  		return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "require autocommit to be 1: got %s", qr.Rows[0][0].ToString())
   330  	}
   331  	qr, err = dbc.conn.ExecuteFetch(getAutoIsNull, 2, false)
   332  	if err != nil {
   333  		return err
   334  	}
   335  	if len(qr.Rows) != 1 {
   336  		return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "incorrect rowcount received for %s: %d", getAutoIsNull, len(qr.Rows))
   337  	}
   338  	if !strings.Contains(qr.Rows[0][0].ToString(), "0") {
   339  		return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "require sql_auto_is_null to be 0: got %s", qr.Rows[0][0].ToString())
   340  	}
   341  	return nil
   342  }
   343  
   344  // Close closes the DBConn.
   345  func (dbc *DBConn) Close() {
   346  	dbc.conn.Close()
   347  }
   348  
   349  // ApplySetting implements the pools.Resource interface.
   350  func (dbc *DBConn) ApplySetting(ctx context.Context, setting *pools.Setting) error {
   351  	query := setting.GetQuery()
   352  	if _, err := dbc.execOnce(ctx, query, 1, false); err != nil {
   353  		return err
   354  	}
   355  	dbc.setting = query
   356  	dbc.resetSetting = setting.GetResetQuery()
   357  	return nil
   358  }
   359  
   360  // IsSettingApplied implements the pools.Resource interface.
   361  func (dbc *DBConn) IsSettingApplied() bool {
   362  	return dbc.setting != ""
   363  }
   364  
   365  // IsSameSetting implements the pools.Resource interface.
   366  func (dbc *DBConn) IsSameSetting(setting string) bool {
   367  	return strings.EqualFold(setting, dbc.setting)
   368  }
   369  
   370  // ResetSetting implements the pools.Resource interface.
   371  func (dbc *DBConn) ResetSetting(ctx context.Context) error {
   372  	if _, err := dbc.execOnce(ctx, dbc.resetSetting, 1, false); err != nil {
   373  		return err
   374  	}
   375  	dbc.setting = ""
   376  	dbc.resetSetting = ""
   377  	return nil
   378  }
   379  
   380  var _ pools.Resource = (*DBConn)(nil)
   381  
   382  // IsClosed returns true if DBConn is closed.
   383  func (dbc *DBConn) IsClosed() bool {
   384  	return dbc.conn.IsClosed()
   385  }
   386  
   387  // Expired returns whether a connection has passed its lifetime
   388  func (dbc *DBConn) Expired(lifetimeTimeout time.Duration) bool {
   389  	return lifetimeTimeout > 0 && time.Until(dbc.timeCreated.Add(lifetimeTimeout)) < 0
   390  }
   391  
   392  // Recycle returns the DBConn to the pool.
   393  func (dbc *DBConn) Recycle() {
   394  	switch {
   395  	case dbc.pool == nil:
   396  		dbc.Close()
   397  	case dbc.conn.IsClosed():
   398  		dbc.pool.Put(nil)
   399  	default:
   400  		dbc.pool.Put(dbc)
   401  	}
   402  }
   403  
   404  // Taint unregister connection from original pool and taints the connection.
   405  func (dbc *DBConn) Taint() {
   406  	if dbc.pool == nil {
   407  		return
   408  	}
   409  	dbc.pool.Put(nil)
   410  	dbc.pool = nil
   411  }
   412  
   413  // Kill kills the currently executing query both on MySQL side
   414  // and on the connection side. If no query is executing, it's a no-op.
   415  // Kill will also not kill a query more than once.
   416  func (dbc *DBConn) Kill(reason string, elapsed time.Duration) error {
   417  	dbc.stats.KillCounters.Add("Queries", 1)
   418  	log.Infof("Due to %s, elapsed time: %v, killing query ID %v %s", reason, elapsed, dbc.conn.ID(), dbc.CurrentForLogging())
   419  
   420  	// Client side action. Set error and close connection.
   421  	dbc.errmu.Lock()
   422  	dbc.err = vterrors.Errorf(vtrpcpb.Code_CANCELED, "(errno 2013) due to %s, elapsed time: %v, killing query ID %v", reason, elapsed, dbc.conn.ID())
   423  	dbc.errmu.Unlock()
   424  	dbc.conn.Close()
   425  
   426  	// Server side action. Kill the session.
   427  	killConn, err := dbc.dbaPool.Get(context.TODO())
   428  	if err != nil {
   429  		log.Warningf("Failed to get conn from dba pool: %v", err)
   430  		return err
   431  	}
   432  	defer killConn.Recycle()
   433  	sql := fmt.Sprintf("kill %d", dbc.conn.ID())
   434  	_, err = killConn.ExecuteFetch(sql, 10000, false)
   435  	if err != nil {
   436  		log.Errorf("Could not kill query ID %v %s: %v", dbc.conn.ID(),
   437  			dbc.CurrentForLogging(), err)
   438  		return err
   439  	}
   440  	return nil
   441  }
   442  
   443  // Current returns the currently executing query.
   444  func (dbc *DBConn) Current() string {
   445  	return dbc.current.Get()
   446  }
   447  
   448  // ID returns the connection id.
   449  func (dbc *DBConn) ID() int64 {
   450  	return dbc.conn.ID()
   451  }
   452  
   453  // BaseShowTables returns a query that shows tables
   454  func (dbc *DBConn) BaseShowTables() string {
   455  	return dbc.conn.BaseShowTables()
   456  }
   457  
   458  // BaseShowTablesWithSizes returns a query that shows tables and their sizes
   459  func (dbc *DBConn) BaseShowTablesWithSizes() string {
   460  	return dbc.conn.BaseShowTablesWithSizes()
   461  }
   462  
   463  func (dbc *DBConn) reconnect(ctx context.Context) error {
   464  	dbc.conn.Close()
   465  	// Reuse MySQLTimings from dbc.conn.
   466  	newConn, err := dbconnpool.NewDBConnection(ctx, dbc.info)
   467  	if err != nil {
   468  		return err
   469  	}
   470  	dbc.conn = newConn
   471  	if dbc.IsSettingApplied() {
   472  		err = dbc.applySameSetting(ctx)
   473  		if err != nil {
   474  			return err
   475  		}
   476  	}
   477  	dbc.errmu.Lock()
   478  	dbc.err = nil
   479  	dbc.errmu.Unlock()
   480  	return nil
   481  }
   482  
   483  // setDeadline starts a goroutine that will kill the currently executing query
   484  // if the deadline is exceeded. It returns a channel and a waitgroup. After the
   485  // query is done executing, the caller is required to close the done channel
   486  // and wait for the waitgroup to make sure that the necessary cleanup is done.
   487  func (dbc *DBConn) setDeadline(ctx context.Context) (chan bool, *sync.WaitGroup) {
   488  	if ctx.Done() == nil {
   489  		return nil, nil
   490  	}
   491  	done := make(chan bool)
   492  	var wg sync.WaitGroup
   493  	wg.Add(1)
   494  	go func() {
   495  		defer wg.Done()
   496  		startTime := time.Now()
   497  		select {
   498  		case <-ctx.Done():
   499  			dbc.Kill(ctx.Err().Error(), time.Since(startTime))
   500  		case <-done:
   501  			return
   502  		}
   503  		elapsed := time.Since(startTime)
   504  
   505  		// Give 2x the elapsed time and some buffer as grace period
   506  		// for the query to get killed.
   507  		tmr2 := time.NewTimer(2*elapsed + 5*time.Second)
   508  		defer tmr2.Stop()
   509  		select {
   510  		case <-tmr2.C:
   511  			dbc.stats.InternalErrors.Add("HungQuery", 1)
   512  			log.Warningf("Query may be hung: %s", dbc.CurrentForLogging())
   513  		case <-done:
   514  			return
   515  		}
   516  		<-done
   517  		log.Warningf("Hung query returned")
   518  	}()
   519  	return done, &wg
   520  }
   521  
   522  // CurrentForLogging applies transformations to the query making it suitable to log.
   523  // It applies sanitization rules based on tablet settings and limits the max length of
   524  // queries.
   525  func (dbc *DBConn) CurrentForLogging() string {
   526  	var queryToLog string
   527  	if dbc.pool != nil && dbc.pool.env != nil && dbc.pool.env.Config() != nil && !dbc.pool.env.Config().SanitizeLogMessages {
   528  		queryToLog = dbc.Current()
   529  	} else {
   530  		queryToLog, _ = sqlparser.RedactSQLQuery(dbc.Current())
   531  	}
   532  	return sqlparser.TruncateForLog(queryToLog)
   533  }
   534  
   535  func (dbc *DBConn) applySameSetting(ctx context.Context) (err error) {
   536  	_, err = dbc.execOnce(ctx, dbc.setting, 1, false)
   537  	return
   538  }