vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/txserializer/tx_serializer.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package txserializer provides the vttablet hot row protection. 18 // See the TxSerializer struct for details. 19 package txserializer 20 21 import ( 22 "fmt" 23 "net/http" 24 "strings" 25 "sync" 26 "time" 27 28 "context" 29 30 "vitess.io/vitess/go/acl" 31 "vitess.io/vitess/go/stats" 32 "vitess.io/vitess/go/streamlog" 33 "vitess.io/vitess/go/sync2" 34 "vitess.io/vitess/go/vt/logutil" 35 "vitess.io/vitess/go/vt/vterrors" 36 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 37 38 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 39 ) 40 41 // TxSerializer serializes incoming transactions which target the same row range 42 // i.e. table name and WHERE clause are identical. 43 // Additional transactions are queued and woken up in arrival order. 44 // 45 // This implementation has some parallels to the sync2.Consolidator class. 46 // However, there are many substantial differences: 47 // - Results are not shared between queued transactions. 48 // - Only one waiting transaction and not all are notified when the current one 49 // has finished. 50 // - Waiting transactions are woken up in FIFO order. 51 // - Waiting transactions are unblocked if their context is done. 52 // - Both the local queue (per row range) and global queue (whole process) are 53 // limited to avoid that queued transactions can consume the full capacity 54 // of vttablet. This is important if the capaciy is finite. For example, the 55 // number of RPCs in flight could be limited by the RPC subsystem. 56 type TxSerializer struct { 57 env tabletenv.Env 58 *sync2.ConsolidatorCache 59 60 // Immutable fields. 61 dryRun bool 62 maxQueueSize int 63 maxGlobalQueueSize int 64 concurrentTransactions int 65 66 // waits stores how many times a transaction was queued because another 67 // transaction was already in flight for the same row (range). 68 // The key of the map is the table name of the query. 69 // 70 // waitsDryRun is similar as "waits": In dry-run mode it records how many 71 // transactions would have been queued. 72 // The key of the map is the table name and WHERE clause. 73 // 74 // queueExceeded counts per table how many transactions were rejected because 75 // the max queue size per row (range) was exceeded. 76 // 77 // queueExceededDryRun counts in dry-run mode how many transactions would have 78 // been rejected due to exceeding the max queue size per row (range). 79 // 80 // globalQueueExceeded is the same as queueExceeded but for the global queue. 81 waits, waitsDryRun, queueExceeded, queueExceededDryRun *stats.CountersWithSingleLabel 82 globalQueueExceeded, globalQueueExceededDryRun *stats.Counter 83 84 log *logutil.ThrottledLogger 85 logDryRun *logutil.ThrottledLogger 86 logWaitsDryRun *logutil.ThrottledLogger 87 logQueueExceededDryRun *logutil.ThrottledLogger 88 logGlobalQueueExceededDryRun *logutil.ThrottledLogger 89 90 mu sync.Mutex 91 queues map[string]*queue 92 globalSize int 93 } 94 95 // New returns a TxSerializer object. 96 func New(env tabletenv.Env) *TxSerializer { 97 config := env.Config() 98 return &TxSerializer{ 99 env: env, 100 ConsolidatorCache: sync2.NewConsolidatorCache(1000), 101 dryRun: config.HotRowProtection.Mode == tabletenv.Dryrun, 102 maxQueueSize: config.HotRowProtection.MaxQueueSize, 103 maxGlobalQueueSize: config.HotRowProtection.MaxGlobalQueueSize, 104 concurrentTransactions: config.HotRowProtection.MaxConcurrency, 105 waits: env.Exporter().NewCountersWithSingleLabel( 106 "TxSerializerWaits", 107 "Number of times a transaction was queued because another transaction was already in flight for the same row range", 108 "table_name"), 109 waitsDryRun: env.Exporter().NewCountersWithSingleLabel( 110 "TxSerializerWaitsDryRun", 111 "Dry run number of transactions that would've been queued", 112 "table_name"), 113 queueExceeded: env.Exporter().NewCountersWithSingleLabel( 114 "TxSerializerQueueExceeded", 115 "Number of transactions that were rejected because the max queue size per row range was exceeded", 116 "table_name"), 117 queueExceededDryRun: env.Exporter().NewCountersWithSingleLabel( 118 "TxSerializerQueueExceededDryRun", 119 "Dry-run Number of transactions that were rejected because the max queue size was exceeded", 120 "table_name"), 121 globalQueueExceeded: env.Exporter().NewCounter( 122 "TxSerializerGlobalQueueExceeded", 123 "Number of transactions that were rejected on the global queue because of exceeding the max queue size per row range"), 124 globalQueueExceededDryRun: env.Exporter().NewCounter( 125 "TxSerializerGlobalQueueExceededDryRun", 126 "Dry-run stats for TxSerializerGlobalQueueExceeded"), 127 log: logutil.NewThrottledLogger("HotRowProtection", 5*time.Second), 128 logDryRun: logutil.NewThrottledLogger("HotRowProtection DryRun", 5*time.Second), 129 logWaitsDryRun: logutil.NewThrottledLogger("HotRowProtection Waits DryRun", 5*time.Second), 130 logQueueExceededDryRun: logutil.NewThrottledLogger("HotRowProtection QueueExceeded DryRun", 5*time.Second), 131 logGlobalQueueExceededDryRun: logutil.NewThrottledLogger("HotRowProtection GlobalQueueExceeded DryRun", 5*time.Second), 132 queues: make(map[string]*queue), 133 } 134 135 } 136 137 // DoneFunc is returned by Wait() and must be called by the caller. 138 type DoneFunc func() 139 140 // Wait blocks if another transaction for the same range is already in flight. 141 // It returns when this transaction has its turn. 142 // "done" is != nil if err == nil and must be called once the transaction is 143 // done and the next waiting transaction can be unblocked. 144 // "waited" is true if Wait() had to wait for other transactions. 145 // "err" is not nil if a) the context is done or b) a queue limit was reached. 146 func (txs *TxSerializer) Wait(ctx context.Context, key, table string) (done DoneFunc, waited bool, err error) { 147 txs.mu.Lock() 148 defer txs.mu.Unlock() 149 150 waited, err = txs.lockLocked(ctx, key, table) 151 if err != nil { 152 if waited { 153 // Waiting failed early e.g. due a canceled context and we did NOT get the 154 // slot. Call "done" now because we don'txs return it to the caller. 155 txs.unlockLocked(key, false /* returnSlot */) 156 } 157 return nil, waited, err 158 } 159 return func() { txs.unlock(key) }, waited, nil 160 } 161 162 // lockLocked queues this transaction. It will unblock immediately if this 163 // transaction is the first in the queue or when it acquired a slot. 164 // The method has the suffix "Locked" to clarify that "txs.mu" must be locked. 165 func (txs *TxSerializer) lockLocked(ctx context.Context, key, table string) (bool, error) { 166 q, ok := txs.queues[key] 167 if !ok { 168 // First transaction in the queue i.e. we don't wait and return immediately. 169 txs.queues[key] = newQueueForFirstTransaction(txs.concurrentTransactions) 170 txs.globalSize++ 171 return false, nil 172 } 173 174 if txs.globalSize >= txs.maxGlobalQueueSize { 175 if txs.dryRun { 176 txs.globalQueueExceededDryRun.Add(1) 177 txs.logGlobalQueueExceededDryRun.Warningf("Would have rejected BeginExecute RPC because there are too many queued transactions (%d >= %d)", txs.globalSize, txs.maxGlobalQueueSize) 178 } else { 179 txs.globalQueueExceeded.Add(1) 180 return false, vterrors.Errorf(vtrpcpb.Code_RESOURCE_EXHAUSTED, 181 "hot row protection: too many queued transactions (%d >= %d)", txs.globalSize, txs.maxGlobalQueueSize) 182 } 183 } 184 185 if q.size >= txs.maxQueueSize { 186 if txs.dryRun { 187 txs.queueExceededDryRun.Add(table, 1) 188 if txs.env.Config().SanitizeLogMessages { 189 txs.logQueueExceededDryRun.Warningf("Would have rejected BeginExecute RPC because there are too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, txs.sanitizeKey(key)) 190 } else { 191 txs.logQueueExceededDryRun.Warningf("Would have rejected BeginExecute RPC because there are too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, key) 192 } 193 } else { 194 txs.queueExceeded.Add(table, 1) 195 if txs.env.Config().TerseErrors { 196 return false, vterrors.Errorf(vtrpcpb.Code_RESOURCE_EXHAUSTED, 197 "hot row protection: too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, txs.sanitizeKey(key)) 198 } 199 return false, vterrors.Errorf(vtrpcpb.Code_RESOURCE_EXHAUSTED, 200 "hot row protection: too many queued transactions (%d >= %d) for the same row (table + WHERE clause: '%v')", q.size, txs.maxQueueSize, key) 201 } 202 } 203 204 if q.availableSlots == nil { 205 // Hot row detected: A second, concurrent transaction is seen for the 206 // first time. 207 208 // As an optimization, we deferred the creation of the channel until now. 209 q.availableSlots = make(chan struct{}, txs.concurrentTransactions) 210 q.availableSlots <- struct{}{} 211 212 // Include first transaction in the count at /debug/hotrows. (It was not 213 // recorded on purpose because it did not wait.) 214 txs.Record(key) 215 } 216 217 txs.globalSize++ 218 q.size++ 219 q.count++ 220 if q.size > q.max { 221 q.max = q.size 222 } 223 // Publish the number of waits at /debug/hotrows. 224 txs.Record(key) 225 226 if txs.dryRun { 227 txs.waitsDryRun.Add(table, 1) 228 if txs.env.Config().SanitizeLogMessages { 229 txs.logWaitsDryRun.Warningf("Would have queued BeginExecute RPC for row (range): '%v' because another transaction to the same range is already in progress.", txs.sanitizeKey(key)) 230 } else { 231 txs.logWaitsDryRun.Warningf("Would have queued BeginExecute RPC for row (range): '%v' because another transaction to the same range is already in progress.", key) 232 } 233 return false, nil 234 } 235 236 // Unlock before the wait and relock before returning because our caller 237 // Wait() holds the lock and assumes it still has it. 238 txs.mu.Unlock() 239 defer txs.mu.Lock() 240 241 // Non-blocking write attempt to get a slot. 242 select { 243 case q.availableSlots <- struct{}{}: 244 // Return waited=false because a slot was immediately available. 245 return false, nil 246 default: 247 } 248 249 // Blocking wait for the next available slot. 250 txs.waits.Add(table, 1) 251 select { 252 case q.availableSlots <- struct{}{}: 253 return true, nil 254 case <-ctx.Done(): 255 return true, ctx.Err() 256 } 257 } 258 259 func (txs *TxSerializer) unlock(key string) { 260 txs.mu.Lock() 261 defer txs.mu.Unlock() 262 263 txs.unlockLocked(key, true) 264 } 265 266 func (txs *TxSerializer) unlockLocked(key string, returnSlot bool) { 267 q := txs.queues[key] 268 q.size-- 269 txs.globalSize-- 270 271 if q.size == 0 { 272 // This is the last transaction in flight. 273 delete(txs.queues, key) 274 275 if q.max > 1 { 276 var logMsg string 277 if txs.env.Config().SanitizeLogMessages { 278 logMsg = fmt.Sprintf("%v simultaneous transactions (%v in total) for the same row range (%v) would have been queued.", q.max, q.count, txs.sanitizeKey(key)) 279 } else { 280 logMsg = fmt.Sprintf("%v simultaneous transactions (%v in total) for the same row range (%v) would have been queued.", q.max, q.count, key) 281 } 282 if txs.dryRun { 283 txs.logDryRun.Infof(logMsg) 284 } else { 285 txs.log.Infof(logMsg) 286 } 287 } 288 289 // Return early because the queue "q" for this "key" will not be used any 290 // more. 291 // We intentionally skip returning the last slot and closing the 292 // "availableSlots" channel because it is not required by Go. 293 return 294 } 295 296 // Give up slot by removing ourselves from the channel. 297 // Wakes up the next queued transaction. 298 299 if txs.dryRun { 300 // Dry-run did not acquire a slot in the first place. 301 return 302 } 303 304 if !returnSlot { 305 // We did not acquire a slot in the first place e.g. due to a canceled context. 306 return 307 } 308 309 // This should never block. 310 <-q.availableSlots 311 } 312 313 // Pending returns the number of queued transactions (including the ones which 314 // are currently in flight.) 315 func (txs *TxSerializer) Pending(key string) int { 316 txs.mu.Lock() 317 defer txs.mu.Unlock() 318 319 q, ok := txs.queues[key] 320 if !ok { 321 return 0 322 } 323 return q.size 324 } 325 326 // ServeHTTP lists the most recent, cached queries and their count. 327 func (txs *TxSerializer) ServeHTTP(response http.ResponseWriter, request *http.Request) { 328 if streamlog.GetRedactDebugUIQueries() { 329 response.Write([]byte(` 330 <!DOCTYPE html> 331 <html> 332 <body> 333 <h1>Redacted</h1> 334 <p>/debug/hotrows has been redacted for your protection</p> 335 </body> 336 </html> 337 `)) 338 return 339 } 340 341 if err := acl.CheckAccessHTTP(request, acl.DEBUGGING); err != nil { 342 acl.SendError(response, err) 343 return 344 } 345 items := txs.Items() 346 response.Header().Set("Content-Type", "text/plain") 347 if items == nil { 348 response.Write([]byte("empty\n")) 349 return 350 } 351 response.Write([]byte(fmt.Sprintf("Length: %d\n", len(items)))) 352 for _, v := range items { 353 response.Write([]byte(fmt.Sprintf("%v: %s\n", v.Count, v.Query))) 354 } 355 } 356 357 // queue represents the local queue for a particular row (range). 358 // 359 // Note that we don't use a dedicated queue structure for all waiting 360 // transactions. Instead, we leverage that Go routines waiting for a channel 361 // are woken up in the order they are queued up. The "availableSlots" field is 362 // said channel which has n free slots (for the number of concurrent 363 // transactions which can access the tx pool). All queued transactions are 364 // competing for these slots and try to add themselves to the channel. 365 type queue struct { 366 // NOTE: The following fields are guarded by TxSerializer.mu. 367 // size counts how many transactions are currently queued/in flight (includes 368 // the transactions which are not waiting.) 369 size int 370 // count is the same as "size", but never gets decremented. 371 count int 372 // max is the max of "size", i.e. the maximum number of transactions which 373 // were simultaneously queued for the same row range. 374 max int 375 376 // availableSlots limits the number of concurrent transactions *per* 377 // hot row (range). It holds one element for each allowed pending 378 // transaction i.e. consumed tx pool slot. Consequently, if the channel 379 // is full, subsequent transactions have to wait until they can place 380 // their entry here. 381 // NOTE: As an optimization, we defer the creation of the channel until 382 // a second transaction for the same hot row is running. 383 availableSlots chan struct{} 384 } 385 386 func newQueueForFirstTransaction(concurrentTransactions int) *queue { 387 return &queue{ 388 size: 1, 389 count: 1, 390 max: 1, 391 } 392 } 393 394 // sanitizeKey takes the internal key and returns one that has potentially 395 // sensitive info removed. 396 // This is needed because the internal key is e.g. 'tbl1 where col1="foo"' 397 // and the WHERE clause can contain sensitive information that should not 398 // be shown so we we strip everything after the first WHERE keyword. 399 // e.g. 'tbl1 where col1="foo" and col2="bar"' -> 'tbl1 ... [REDACTED]' 400 func (txs *TxSerializer) sanitizeKey(key string) string { 401 var sanitizedKey string 402 whereLoc := strings.Index(strings.ToLower(key), "where") 403 if whereLoc != -1 { 404 sanitizedKey = key[:whereLoc] + "... [REDACTED]" 405 } else { 406 sanitizedKey = key 407 } 408 return sanitizedKey 409 }