vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/txserializer/tx_serializer.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package txserializer provides the vttablet hot row protection.
    18  // See the TxSerializer struct for details.
    19  package txserializer
    20  
    21  import (
    22  	"fmt"
    23  	"net/http"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"context"
    29  
    30  	"vitess.io/vitess/go/acl"
    31  	"vitess.io/vitess/go/stats"
    32  	"vitess.io/vitess/go/streamlog"
    33  	"vitess.io/vitess/go/sync2"
    34  	"vitess.io/vitess/go/vt/logutil"
    35  	"vitess.io/vitess/go/vt/vterrors"
    36  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    37  
    38  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    39  )
    40  
    41  // TxSerializer serializes incoming transactions which target the same row range
    42  // i.e. table name and WHERE clause are identical.
    43  // Additional transactions are queued and woken up in arrival order.
    44  //
    45  // This implementation has some parallels to the sync2.Consolidator class.
    46  // However, there are many substantial differences:
    47  //   - Results are not shared between queued transactions.
    48  //   - Only one waiting transaction and not all are notified when the current one
    49  //     has finished.
    50  //   - Waiting transactions are woken up in FIFO order.
    51  //   - Waiting transactions are unblocked if their context is done.
    52  //   - Both the local queue (per row range) and global queue (whole process) are
    53  //     limited to avoid that queued transactions can consume the full capacity
    54  //     of vttablet. This is important if the capaciy is finite. For example, the
    55  //     number of RPCs in flight could be limited by the RPC subsystem.
    56  type TxSerializer struct {
    57  	env tabletenv.Env
    58  	*sync2.ConsolidatorCache
    59  
    60  	// Immutable fields.
    61  	dryRun                 bool
    62  	maxQueueSize           int
    63  	maxGlobalQueueSize     int
    64  	concurrentTransactions int
    65  
    66  	// waits stores how many times a transaction was queued because another
    67  	// transaction was already in flight for the same row (range).
    68  	// The key of the map is the table name of the query.
    69  	//
    70  	// waitsDryRun is similar as "waits": In dry-run mode it records how many
    71  	// transactions would have been queued.
    72  	// The key of the map is the table name and WHERE clause.
    73  	//
    74  	// queueExceeded counts per table how many transactions were rejected because
    75  	// the max queue size per row (range) was exceeded.
    76  	//
    77  	// queueExceededDryRun counts in dry-run mode how many transactions would have
    78  	// been rejected due to exceeding the max queue size per row (range).
    79  	//
    80  	// globalQueueExceeded is the same as queueExceeded but for the global queue.
    81  	waits, waitsDryRun, queueExceeded, queueExceededDryRun *stats.CountersWithSingleLabel
    82  	globalQueueExceeded, globalQueueExceededDryRun         *stats.Counter
    83  
    84  	log                          *logutil.ThrottledLogger
    85  	logDryRun                    *logutil.ThrottledLogger
    86  	logWaitsDryRun               *logutil.ThrottledLogger
    87  	logQueueExceededDryRun       *logutil.ThrottledLogger
    88  	logGlobalQueueExceededDryRun *logutil.ThrottledLogger
    89  
    90  	mu         sync.Mutex
    91  	queues     map[string]*queue
    92  	globalSize int
    93  }
    94  
    95  // New returns a TxSerializer object.
    96  func New(env tabletenv.Env) *TxSerializer {
    97  	config := env.Config()
    98  	return &TxSerializer{
    99  		env:                    env,
   100  		ConsolidatorCache:      sync2.NewConsolidatorCache(1000),
   101  		dryRun:                 config.HotRowProtection.Mode == tabletenv.Dryrun,
   102  		maxQueueSize:           config.HotRowProtection.MaxQueueSize,
   103  		maxGlobalQueueSize:     config.HotRowProtection.MaxGlobalQueueSize,
   104  		concurrentTransactions: config.HotRowProtection.MaxConcurrency,
   105  		waits: env.Exporter().NewCountersWithSingleLabel(
   106  			"TxSerializerWaits",
   107  			"Number of times a transaction was queued because another transaction was already in flight for the same row range",
   108  			"table_name"),
   109  		waitsDryRun: env.Exporter().NewCountersWithSingleLabel(
   110  			"TxSerializerWaitsDryRun",
   111  			"Dry run number of transactions that would've been queued",
   112  			"table_name"),
   113  		queueExceeded: env.Exporter().NewCountersWithSingleLabel(
   114  			"TxSerializerQueueExceeded",
   115  			"Number of transactions that were rejected because the max queue size per row range was exceeded",
   116  			"table_name"),
   117  		queueExceededDryRun: env.Exporter().NewCountersWithSingleLabel(
   118  			"TxSerializerQueueExceededDryRun",
   119  			"Dry-run Number of transactions that were rejected because the max queue size was exceeded",
   120  			"table_name"),
   121  		globalQueueExceeded: env.Exporter().NewCounter(
   122  			"TxSerializerGlobalQueueExceeded",
   123  			"Number of transactions that were rejected on the global queue because of exceeding the max queue size per row range"),
   124  		globalQueueExceededDryRun: env.Exporter().NewCounter(
   125  			"TxSerializerGlobalQueueExceededDryRun",
   126  			"Dry-run stats for TxSerializerGlobalQueueExceeded"),
   127  		log:                          logutil.NewThrottledLogger("HotRowProtection", 5*time.Second),
   128  		logDryRun:                    logutil.NewThrottledLogger("HotRowProtection DryRun", 5*time.Second),
   129  		logWaitsDryRun:               logutil.NewThrottledLogger("HotRowProtection Waits DryRun", 5*time.Second),
   130  		logQueueExceededDryRun:       logutil.NewThrottledLogger("HotRowProtection QueueExceeded DryRun", 5*time.Second),
   131  		logGlobalQueueExceededDryRun: logutil.NewThrottledLogger("HotRowProtection GlobalQueueExceeded DryRun", 5*time.Second),
   132  		queues:                       make(map[string]*queue),
   133  	}
   134  
   135  }
   136  
   137  // DoneFunc is returned by Wait() and must be called by the caller.
   138  type DoneFunc func()
   139  
   140  // Wait blocks if another transaction for the same range is already in flight.
   141  // It returns when this transaction has its turn.
   142  // "done" is != nil if err == nil and must be called once the transaction is
   143  // done and the next waiting transaction can be unblocked.
   144  // "waited" is true if Wait() had to wait for other transactions.
   145  // "err" is not nil if a) the context is done or b) a queue limit was reached.
   146  func (txs *TxSerializer) Wait(ctx context.Context, key, table string) (done DoneFunc, waited bool, err error) {
   147  	txs.mu.Lock()
   148  	defer txs.mu.Unlock()
   149  
   150  	waited, err = txs.lockLocked(ctx, key, table)
   151  	if err != nil {
   152  		if waited {
   153  			// Waiting failed early e.g. due a canceled context and we did NOT get the
   154  			// slot. Call "done" now because we don'txs return it to the caller.
   155  			txs.unlockLocked(key, false /* returnSlot */)
   156  		}
   157  		return nil, waited, err
   158  	}
   159  	return func() { txs.unlock(key) }, waited, nil
   160  }
   161  
   162  // lockLocked queues this transaction. It will unblock immediately if this
   163  // transaction is the first in the queue or when it acquired a slot.
   164  // The method has the suffix "Locked" to clarify that "txs.mu" must be locked.
   165  func (txs *TxSerializer) lockLocked(ctx context.Context, key, table string) (bool, error) {
   166  	q, ok := txs.queues[key]
   167  	if !ok {
   168  		// First transaction in the queue i.e. we don't wait and return immediately.
   169  		txs.queues[key] = newQueueForFirstTransaction(txs.concurrentTransactions)
   170  		txs.globalSize++
   171  		return false, nil
   172  	}
   173  
   174  	if txs.globalSize >= txs.maxGlobalQueueSize {
   175  		if txs.dryRun {
   176  			txs.globalQueueExceededDryRun.Add(1)
   177  			txs.logGlobalQueueExceededDryRun.Warningf("Would have rejected BeginExecute RPC because there are too many queued transactions (%d >= %d)", txs.globalSize, txs.maxGlobalQueueSize)
   178  		} else {
   179  			txs.globalQueueExceeded.Add(1)
   180  			return false, vterrors.Errorf(vtrpcpb.Code_RESOURCE_EXHAUSTED,
   181  				"hot row protection: too many queued transactions (%d >= %d)", txs.globalSize, txs.maxGlobalQueueSize)
   182  		}
   183  	}
   184  
   185  	if q.size >= txs.maxQueueSize {
   186  		if txs.dryRun {
   187  			txs.queueExceededDryRun.Add(table, 1)
   188  			if txs.env.Config().SanitizeLogMessages {
   189  				txs.logQueueExceededDryRun.Warningf("Would have rejected BeginExecute RPC because there are too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, txs.sanitizeKey(key))
   190  			} else {
   191  				txs.logQueueExceededDryRun.Warningf("Would have rejected BeginExecute RPC because there are too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, key)
   192  			}
   193  		} else {
   194  			txs.queueExceeded.Add(table, 1)
   195  			if txs.env.Config().TerseErrors {
   196  				return false, vterrors.Errorf(vtrpcpb.Code_RESOURCE_EXHAUSTED,
   197  					"hot row protection: too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, txs.sanitizeKey(key))
   198  			}
   199  			return false, vterrors.Errorf(vtrpcpb.Code_RESOURCE_EXHAUSTED,
   200  				"hot row protection: too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, key)
   201  		}
   202  	}
   203  
   204  	if q.availableSlots == nil {
   205  		// Hot row detected: A second, concurrent transaction is seen for the
   206  		// first time.
   207  
   208  		// As an optimization, we deferred the creation of the channel until now.
   209  		q.availableSlots = make(chan struct{}, txs.concurrentTransactions)
   210  		q.availableSlots <- struct{}{}
   211  
   212  		// Include first transaction in the count at /debug/hotrows. (It was not
   213  		// recorded on purpose because it did not wait.)
   214  		txs.Record(key)
   215  	}
   216  
   217  	txs.globalSize++
   218  	q.size++
   219  	q.count++
   220  	if q.size > q.max {
   221  		q.max = q.size
   222  	}
   223  	// Publish the number of waits at /debug/hotrows.
   224  	txs.Record(key)
   225  
   226  	if txs.dryRun {
   227  		txs.waitsDryRun.Add(table, 1)
   228  		if txs.env.Config().SanitizeLogMessages {
   229  			txs.logWaitsDryRun.Warningf("Would have queued BeginExecute RPC for row (range): '%v' because another transaction to the same range is already in progress.", txs.sanitizeKey(key))
   230  		} else {
   231  			txs.logWaitsDryRun.Warningf("Would have queued BeginExecute RPC for row (range): '%v' because another transaction to the same range is already in progress.", key)
   232  		}
   233  		return false, nil
   234  	}
   235  
   236  	// Unlock before the wait and relock before returning because our caller
   237  	// Wait() holds the lock and assumes it still has it.
   238  	txs.mu.Unlock()
   239  	defer txs.mu.Lock()
   240  
   241  	// Non-blocking write attempt to get a slot.
   242  	select {
   243  	case q.availableSlots <- struct{}{}:
   244  		// Return waited=false because a slot was immediately available.
   245  		return false, nil
   246  	default:
   247  	}
   248  
   249  	// Blocking wait for the next available slot.
   250  	txs.waits.Add(table, 1)
   251  	select {
   252  	case q.availableSlots <- struct{}{}:
   253  		return true, nil
   254  	case <-ctx.Done():
   255  		return true, ctx.Err()
   256  	}
   257  }
   258  
   259  func (txs *TxSerializer) unlock(key string) {
   260  	txs.mu.Lock()
   261  	defer txs.mu.Unlock()
   262  
   263  	txs.unlockLocked(key, true)
   264  }
   265  
   266  func (txs *TxSerializer) unlockLocked(key string, returnSlot bool) {
   267  	q := txs.queues[key]
   268  	q.size--
   269  	txs.globalSize--
   270  
   271  	if q.size == 0 {
   272  		// This is the last transaction in flight.
   273  		delete(txs.queues, key)
   274  
   275  		if q.max > 1 {
   276  			var logMsg string
   277  			if txs.env.Config().SanitizeLogMessages {
   278  				logMsg = fmt.Sprintf("%v simultaneous transactions (%v in total) for the same row range (%v) would have been queued.", q.max, q.count, txs.sanitizeKey(key))
   279  			} else {
   280  				logMsg = fmt.Sprintf("%v simultaneous transactions (%v in total) for the same row range (%v) would have been queued.", q.max, q.count, key)
   281  			}
   282  			if txs.dryRun {
   283  				txs.logDryRun.Infof(logMsg)
   284  			} else {
   285  				txs.log.Infof(logMsg)
   286  			}
   287  		}
   288  
   289  		// Return early because the queue "q" for this "key" will not be used any
   290  		// more.
   291  		// We intentionally skip returning the last slot and closing the
   292  		// "availableSlots" channel because it is not required by Go.
   293  		return
   294  	}
   295  
   296  	// Give up slot by removing ourselves from the channel.
   297  	// Wakes up the next queued transaction.
   298  
   299  	if txs.dryRun {
   300  		// Dry-run did not acquire a slot in the first place.
   301  		return
   302  	}
   303  
   304  	if !returnSlot {
   305  		// We did not acquire a slot in the first place e.g. due to a canceled context.
   306  		return
   307  	}
   308  
   309  	// This should never block.
   310  	<-q.availableSlots
   311  }
   312  
   313  // Pending returns the number of queued transactions (including the ones which
   314  // are currently in flight.)
   315  func (txs *TxSerializer) Pending(key string) int {
   316  	txs.mu.Lock()
   317  	defer txs.mu.Unlock()
   318  
   319  	q, ok := txs.queues[key]
   320  	if !ok {
   321  		return 0
   322  	}
   323  	return q.size
   324  }
   325  
   326  // ServeHTTP lists the most recent, cached queries and their count.
   327  func (txs *TxSerializer) ServeHTTP(response http.ResponseWriter, request *http.Request) {
   328  	if streamlog.GetRedactDebugUIQueries() {
   329  		response.Write([]byte(`
   330  	<!DOCTYPE html>
   331  	<html>
   332  	<body>
   333  	<h1>Redacted</h1>
   334  	<p>/debug/hotrows has been redacted for your protection</p>
   335  	</body>
   336  	</html>
   337  		`))
   338  		return
   339  	}
   340  
   341  	if err := acl.CheckAccessHTTP(request, acl.DEBUGGING); err != nil {
   342  		acl.SendError(response, err)
   343  		return
   344  	}
   345  	items := txs.Items()
   346  	response.Header().Set("Content-Type", "text/plain")
   347  	if items == nil {
   348  		response.Write([]byte("empty\n"))
   349  		return
   350  	}
   351  	response.Write([]byte(fmt.Sprintf("Length: %d\n", len(items))))
   352  	for _, v := range items {
   353  		response.Write([]byte(fmt.Sprintf("%v: %s\n", v.Count, v.Query)))
   354  	}
   355  }
   356  
   357  // queue represents the local queue for a particular row (range).
   358  //
   359  // Note that we don't use a dedicated queue structure for all waiting
   360  // transactions. Instead, we leverage that Go routines waiting for a channel
   361  // are woken up in the order they are queued up. The "availableSlots" field is
   362  // said channel which has n free slots (for the number of concurrent
   363  // transactions which can access the tx pool). All queued transactions are
   364  // competing for these slots and try to add themselves to the channel.
   365  type queue struct {
   366  	// NOTE: The following fields are guarded by TxSerializer.mu.
   367  	// size counts how many transactions are currently queued/in flight (includes
   368  	// the transactions which are not waiting.)
   369  	size int
   370  	// count is the same as "size", but never gets decremented.
   371  	count int
   372  	// max is the max of "size", i.e. the maximum number of transactions which
   373  	// were simultaneously queued for the same row range.
   374  	max int
   375  
   376  	// availableSlots limits the number of concurrent transactions *per*
   377  	// hot row (range). It holds one element for each allowed pending
   378  	// transaction i.e. consumed tx pool slot. Consequently, if the channel
   379  	// is full, subsequent transactions have to wait until they can place
   380  	// their entry here.
   381  	// NOTE: As an optimization, we defer the creation of the channel until
   382  	// a second transaction for the same hot row is running.
   383  	availableSlots chan struct{}
   384  }
   385  
   386  func newQueueForFirstTransaction(concurrentTransactions int) *queue {
   387  	return &queue{
   388  		size:  1,
   389  		count: 1,
   390  		max:   1,
   391  	}
   392  }
   393  
   394  // sanitizeKey takes the internal key and returns one that has potentially
   395  // sensitive info removed.
   396  // This is needed because the internal key is e.g. 'tbl1 where col1="foo"'
   397  // and the WHERE clause can contain sensitive information that should not
   398  // be shown so we we strip everything after the first WHERE keyword.
   399  // e.g. 'tbl1 where col1="foo" and col2="bar"' -> 'tbl1 ... [REDACTED]'
   400  func (txs *TxSerializer) sanitizeKey(key string) string {
   401  	var sanitizedKey string
   402  	whereLoc := strings.Index(strings.ToLower(key), "where")
   403  	if whereLoc != -1 {
   404  		sanitizedKey = key[:whereLoc] + "... [REDACTED]"
   405  	} else {
   406  		sanitizedKey = key
   407  	}
   408  	return sanitizedKey
   409  }