github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/txn.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package einsteindb 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "runtime/trace" 21 "sort" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/dgryski/go-farm" 27 "github.com/opentracing/opentracing-go" 28 "github.com/whtcorpsinc/BerolinaSQL/terror" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/failpoint" 31 "github.com/whtcorpsinc/milevadb/ekv" 32 "github.com/whtcorpsinc/milevadb/metrics" 33 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 34 "github.com/whtcorpsinc/milevadb/soliton/logutil" 35 "github.com/whtcorpsinc/milevadb/stochastikctx" 36 "go.uber.org/zap" 37 ) 38 39 var ( 40 _ ekv.Transaction = (*einsteindbTxn)(nil) 41 ) 42 43 var ( 44 einsteindbTxnCmdHistogramWithCommit = metrics.EinsteinDBTxnCmdHistogram.WithLabelValues(metrics.LblCommit) 45 einsteindbTxnCmdHistogramWithRollback = metrics.EinsteinDBTxnCmdHistogram.WithLabelValues(metrics.LblRollback) 46 einsteindbTxnCmdHistogramWithBatchGet = metrics.EinsteinDBTxnCmdHistogram.WithLabelValues(metrics.LblBatchGet) 47 einsteindbTxnCmdHistogramWithGet = metrics.EinsteinDBTxnCmdHistogram.WithLabelValues(metrics.LblGet) 48 ) 49 50 // SchemaAmender is used by pessimistic transactions to amend commit mutations for schemaReplicant change during 2pc. 51 type SchemaAmender interface { 52 // AmendTxn is the amend entry, new mutations will be generated based on input mutations using schemaReplicant change info. 53 // The returned results are mutations need to prewrite and mutations need to cleanup. 54 AmendTxn(ctx context.Context, startSchemaReplicant SchemaVer, change *RelatedSchemaChange, mutations CommitterMutations) (*CommitterMutations, error) 55 } 56 57 // einsteindbTxn implements ekv.Transaction. 58 type einsteindbTxn struct { 59 snapshot *einsteindbSnapshot 60 us ekv.UnionStore 61 causetstore *einsteindbStore // for connection to region. 62 startTS uint64 63 startTime time.Time // Monotonic timestamp for recording txn time consuming. 64 commitTS uint64 65 mu sync.Mutex // For thread-safe LockKeys function. 66 setCnt int64 67 vars *ekv.Variables 68 committer *twoPhaseCommitter 69 lockedCnt int 70 71 // For data consistency check. 72 // assertions[:confirmed] is the assertion of current transaction. 73 // assertions[confirmed:len(assertions)] is the assertions of current memex. 74 // StmtCommit/StmtRollback may change the confirmed position. 75 assertions []assertionPair 76 confirmed int 77 78 valid bool 79 dirty bool 80 81 // txnSchemaReplicant is the schemaReplicant fetched at startTS. 82 txnSchemaReplicant SchemaVer 83 // SchemaAmender is used amend pessimistic txn commit mutations for schemaReplicant change 84 schemaAmender SchemaAmender 85 // commitCallback is called after current transaction gets committed 86 commitCallback func(info ekv.TxnInfo, err error) 87 } 88 89 func newEinsteinDBTxn(causetstore *einsteindbStore) (*einsteindbTxn, error) { 90 bo := NewBackofferWithVars(context.Background(), tsoMaxBackoff, nil) 91 startTS, err := causetstore.getTimestampWithRetry(bo) 92 if err != nil { 93 return nil, errors.Trace(err) 94 } 95 return newEinsteinDBTxnWithStartTS(causetstore, startTS, causetstore.nextReplicaReadSeed()) 96 } 97 98 // newEinsteinDBTxnWithStartTS creates a txn with startTS. 99 func newEinsteinDBTxnWithStartTS(causetstore *einsteindbStore, startTS uint64, replicaReadSeed uint32) (*einsteindbTxn, error) { 100 ver := ekv.NewVersion(startTS) 101 snapshot := newEinsteinDBSnapshot(causetstore, ver, replicaReadSeed) 102 return &einsteindbTxn{ 103 snapshot: snapshot, 104 us: ekv.NewUnionStore(snapshot), 105 causetstore: causetstore, 106 startTS: startTS, 107 startTime: time.Now(), 108 valid: true, 109 vars: ekv.DefaultVars, 110 }, nil 111 } 112 113 type assertionPair struct { 114 key ekv.Key 115 assertion ekv.AssertionType 116 } 117 118 func (a assertionPair) String() string { 119 return fmt.Sprintf("key: %s, assertion type: %d", a.key, a.assertion) 120 } 121 122 // SetSuccess is used to probe if ekv variables are set or not. It is ONLY used in test cases. 123 var SetSuccess = false 124 125 func (txn *einsteindbTxn) SetVars(vars *ekv.Variables) { 126 txn.vars = vars 127 txn.snapshot.vars = vars 128 failpoint.Inject("probeSetVars", func(val failpoint.Value) { 129 if val.(bool) { 130 SetSuccess = true 131 } 132 }) 133 } 134 135 func (txn *einsteindbTxn) GetVars() *ekv.Variables { 136 return txn.vars 137 } 138 139 // Get implements transaction interface. 140 func (txn *einsteindbTxn) Get(ctx context.Context, k ekv.Key) ([]byte, error) { 141 ret, err := txn.us.Get(ctx, k) 142 if ekv.IsErrNotFound(err) { 143 return nil, err 144 } 145 if err != nil { 146 return nil, errors.Trace(err) 147 } 148 149 return ret, nil 150 } 151 152 func (txn *einsteindbTxn) BatchGet(ctx context.Context, keys []ekv.Key) (map[string][]byte, error) { 153 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 154 span1 := span.Tracer().StartSpan("einsteindbTxn.BatchGet", opentracing.ChildOf(span.Context())) 155 defer span1.Finish() 156 ctx = opentracing.ContextWithSpan(ctx, span1) 157 } 158 return ekv.NewBufferBatchGetter(txn.GetMemBuffer(), nil, txn.snapshot).BatchGet(ctx, keys) 159 } 160 161 func (txn *einsteindbTxn) Set(k ekv.Key, v []byte) error { 162 txn.setCnt++ 163 return txn.us.GetMemBuffer().Set(k, v) 164 } 165 166 func (txn *einsteindbTxn) String() string { 167 return fmt.Sprintf("%d", txn.StartTS()) 168 } 169 170 func (txn *einsteindbTxn) Iter(k ekv.Key, upperBound ekv.Key) (ekv.Iterator, error) { 171 return txn.us.Iter(k, upperBound) 172 } 173 174 // IterReverse creates a reversed Iterator positioned on the first entry which key is less than k. 175 func (txn *einsteindbTxn) IterReverse(k ekv.Key) (ekv.Iterator, error) { 176 return txn.us.IterReverse(k) 177 } 178 179 func (txn *einsteindbTxn) Delete(k ekv.Key) error { 180 return txn.us.GetMemBuffer().Delete(k) 181 } 182 183 func (txn *einsteindbTxn) SetOption(opt ekv.Option, val interface{}) { 184 txn.us.SetOption(opt, val) 185 txn.snapshot.SetOption(opt, val) 186 switch opt { 187 case ekv.SchemaReplicant: 188 txn.txnSchemaReplicant = val.(SchemaVer) 189 case ekv.SchemaAmender: 190 txn.schemaAmender = val.(SchemaAmender) 191 case ekv.CommitHook: 192 txn.commitCallback = val.(func(info ekv.TxnInfo, err error)) 193 } 194 } 195 196 func (txn *einsteindbTxn) DelOption(opt ekv.Option) { 197 txn.us.DelOption(opt) 198 } 199 200 func (txn *einsteindbTxn) IsPessimistic() bool { 201 return txn.us.GetOption(ekv.Pessimistic) != nil 202 } 203 204 func (txn *einsteindbTxn) Commit(ctx context.Context) error { 205 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 206 span1 := span.Tracer().StartSpan("einsteindbTxn.Commit", opentracing.ChildOf(span.Context())) 207 defer span1.Finish() 208 ctx = opentracing.ContextWithSpan(ctx, span1) 209 } 210 defer trace.StartRegion(ctx, "CommitTxn").End() 211 212 if !txn.valid { 213 return ekv.ErrInvalidTxn 214 } 215 defer txn.close() 216 217 failpoint.Inject("mockCommitError", func(val failpoint.Value) { 218 if val.(bool) && ekv.IsMockCommitErrorEnable() { 219 ekv.MockCommitErrorDisable() 220 failpoint.Return(errors.New("mock commit error")) 221 } 222 }) 223 224 start := time.Now() 225 defer func() { einsteindbTxnCmdHistogramWithCommit.Observe(time.Since(start).Seconds()) }() 226 227 // connID is used for log. 228 var connID uint64 229 val := ctx.Value(stochastikctx.ConnID) 230 if val != nil { 231 connID = val.(uint64) 232 } 233 234 var err error 235 // If the txn use pessimistic dagger, committer is initialized. 236 committer := txn.committer 237 if committer == nil { 238 committer, err = newTwoPhaseCommitter(txn, connID) 239 if err != nil { 240 return errors.Trace(err) 241 } 242 } 243 defer func() { 244 // For async commit transactions, the ttl manager will be closed in the asynchronous commit goroutine. 245 if !committer.isAsyncCommit() { 246 committer.ttlManager.close() 247 } 248 }() 249 250 initRegion := trace.StartRegion(ctx, "InitKeys") 251 err = committer.initKeysAndMutations() 252 initRegion.End() 253 if err != nil { 254 return errors.Trace(err) 255 } 256 if committer.mutations.len() == 0 { 257 return nil 258 } 259 260 defer func() { 261 ctxValue := ctx.Value(execdetails.CommitDetailCtxKey) 262 if ctxValue != nil { 263 commitDetail := ctxValue.(**execdetails.CommitDetails) 264 if *commitDetail != nil { 265 (*commitDetail).TxnRetry++ 266 } else { 267 *commitDetail = committer.getDetail() 268 } 269 } 270 }() 271 // latches disabled 272 // pessimistic transaction should also bypass latch. 273 if txn.causetstore.txnLatches == nil || txn.IsPessimistic() { 274 err = committer.execute(ctx) 275 if val == nil || connID > 0 { 276 txn.onCommitted(err) 277 } 278 logutil.Logger(ctx).Debug("[ekv] txnLatches disabled, 2pc directly", zap.Error(err)) 279 return errors.Trace(err) 280 } 281 282 // latches enabled 283 // for transactions which need to acquire latches 284 start = time.Now() 285 dagger := txn.causetstore.txnLatches.Lock(committer.startTS, committer.mutations.keys) 286 commitDetail := committer.getDetail() 287 commitDetail.LocalLatchTime = time.Since(start) 288 if commitDetail.LocalLatchTime > 0 { 289 metrics.EinsteinDBLocalLatchWaitTimeHistogram.Observe(commitDetail.LocalLatchTime.Seconds()) 290 } 291 defer txn.causetstore.txnLatches.UnLock(dagger) 292 if dagger.IsStale() { 293 return ekv.ErrWriteConflictInMilevaDB.FastGenByArgs(txn.startTS) 294 } 295 err = committer.execute(ctx) 296 if val == nil || connID > 0 { 297 txn.onCommitted(err) 298 } 299 if err == nil { 300 dagger.SetCommitTS(committer.commitTS) 301 } 302 logutil.Logger(ctx).Debug("[ekv] txnLatches enabled while txn retryable", zap.Error(err)) 303 return errors.Trace(err) 304 } 305 306 func (txn *einsteindbTxn) close() { 307 txn.valid = false 308 } 309 310 func (txn *einsteindbTxn) Rollback() error { 311 if !txn.valid { 312 return ekv.ErrInvalidTxn 313 } 314 start := time.Now() 315 // Clean up pessimistic dagger. 316 if txn.IsPessimistic() && txn.committer != nil { 317 err := txn.rollbackPessimisticLocks() 318 txn.committer.ttlManager.close() 319 if err != nil { 320 logutil.BgLogger().Error(err.Error()) 321 } 322 } 323 txn.close() 324 logutil.BgLogger().Debug("[ekv] rollback txn", zap.Uint64("txnStartTS", txn.StartTS())) 325 einsteindbTxnCmdHistogramWithRollback.Observe(time.Since(start).Seconds()) 326 return nil 327 } 328 329 func (txn *einsteindbTxn) rollbackPessimisticLocks() error { 330 if txn.lockedCnt == 0 { 331 return nil 332 } 333 bo := NewBackofferWithVars(context.Background(), cleanupMaxBackoff, txn.vars) 334 keys := txn.collectLockedKeys() 335 return txn.committer.pessimisticRollbackMutations(bo, CommitterMutations{keys: keys}) 336 } 337 338 func (txn *einsteindbTxn) collectLockedKeys() [][]byte { 339 keys := make([][]byte, 0, txn.lockedCnt) 340 buf := txn.GetMemBuffer() 341 var err error 342 for it := buf.IterWithFlags(nil, nil); it.Valid(); err = it.Next() { 343 _ = err 344 if it.Flags().HasLocked() { 345 keys = append(keys, it.Key()) 346 } 347 } 348 return keys 349 } 350 351 func (txn *einsteindbTxn) onCommitted(err error) { 352 if txn.commitCallback != nil { 353 info := ekv.TxnInfo{StartTS: txn.startTS, CommitTS: txn.commitTS} 354 if err != nil { 355 info.ErrMsg = err.Error() 356 } 357 txn.commitCallback(info, err) 358 } 359 } 360 361 // lockWaitTime in ms, except that ekv.LockAlwaysWait(0) means always wait dagger, ekv.LockNowait(-1) means nowait dagger 362 func (txn *einsteindbTxn) LockKeys(ctx context.Context, lockCtx *ekv.LockCtx, keysInput ...ekv.Key) error { 363 // Exclude keys that are already locked. 364 var err error 365 keys := make([][]byte, 0, len(keysInput)) 366 startTime := time.Now() 367 txn.mu.Lock() 368 defer txn.mu.Unlock() 369 defer func() { 370 if err == nil { 371 if lockCtx.PessimisticLockWaited != nil { 372 if atomic.LoadInt32(lockCtx.PessimisticLockWaited) > 0 { 373 timeWaited := time.Since(lockCtx.WaitStartTime) 374 atomic.StoreInt64(lockCtx.LockKeysDuration, int64(timeWaited)) 375 metrics.EinsteinDBPessimisticLockKeysDuration.Observe(timeWaited.Seconds()) 376 } 377 } 378 } 379 if lockCtx.LockKeysCount != nil { 380 *lockCtx.LockKeysCount += int32(len(keys)) 381 } 382 if lockCtx.Stats != nil { 383 lockCtx.Stats.TotalTime = time.Since(startTime) 384 ctxValue := ctx.Value(execdetails.LockKeysDetailCtxKey) 385 if ctxValue != nil { 386 lockKeysDetail := ctxValue.(**execdetails.LockKeysDetails) 387 *lockKeysDetail = lockCtx.Stats 388 } 389 } 390 }() 391 memBuf := txn.us.GetMemBuffer() 392 for _, key := range keysInput { 393 // The value of lockedMap is only used by pessimistic transactions. 394 var valueExist, locked, checkKeyExists bool 395 if flags, err := memBuf.GetFlags(key); err == nil { 396 locked = flags.HasLocked() 397 valueExist = flags.HasLockedValueExists() 398 checkKeyExists = flags.HasNeedCheckExists() 399 } 400 if !locked { 401 keys = append(keys, key) 402 } else if txn.IsPessimistic() { 403 if checkKeyExists && valueExist { 404 return txn.committer.extractKeyExistsErr(key) 405 } 406 } 407 if lockCtx.ReturnValues && locked { 408 // An already locked key can not return values, we add an entry to let the caller get the value 409 // in other ways. 410 lockCtx.Values[string(key)] = ekv.ReturnedValue{AlreadyLocked: true} 411 } 412 } 413 if len(keys) == 0 { 414 return nil 415 } 416 keys = deduplicateKeys(keys) 417 if txn.IsPessimistic() && lockCtx.ForUFIDelateTS > 0 { 418 if txn.committer == nil { 419 // connID is used for log. 420 var connID uint64 421 var err error 422 val := ctx.Value(stochastikctx.ConnID) 423 if val != nil { 424 connID = val.(uint64) 425 } 426 txn.committer, err = newTwoPhaseCommitter(txn, connID) 427 if err != nil { 428 return err 429 } 430 } 431 var assignedPrimaryKey bool 432 if txn.committer.primaryKey == nil { 433 txn.committer.primaryKey = keys[0] 434 assignedPrimaryKey = true 435 } 436 437 lockCtx.Stats = &execdetails.LockKeysDetails{ 438 LockKeys: int32(len(keys)), 439 } 440 bo := NewBackofferWithVars(ctx, pessimisticLockMaxBackoff, txn.vars) 441 txn.committer.forUFIDelateTS = lockCtx.ForUFIDelateTS 442 // If the number of keys greater than 1, it can be on different region, 443 // concurrently execute on multiple regions may lead to deadlock. 444 txn.committer.isFirstLock = txn.lockedCnt == 0 && len(keys) == 1 445 err = txn.committer.pessimisticLockMutations(bo, lockCtx, CommitterMutations{keys: keys}) 446 if bo.totalSleep > 0 { 447 atomic.AddInt64(&lockCtx.Stats.BackoffTime, int64(bo.totalSleep)*int64(time.Millisecond)) 448 lockCtx.Stats.Mu.Lock() 449 lockCtx.Stats.Mu.BackoffTypes = append(lockCtx.Stats.Mu.BackoffTypes, bo.types...) 450 lockCtx.Stats.Mu.Unlock() 451 } 452 if lockCtx.Killed != nil { 453 // If the kill signal is received during waiting for pessimisticLock, 454 // pessimisticLockKeys would handle the error but it doesn't reset the flag. 455 // We need to reset the killed flag here. 456 atomic.CompareAndSwapUint32(lockCtx.Killed, 1, 0) 457 } 458 if err != nil { 459 for _, key := range keys { 460 if txn.us.HasPresumeKeyNotExists(key) { 461 txn.us.UnmarkPresumeKeyNotExists(key) 462 } 463 } 464 keyMayBeLocked := terror.ErrorNotEqual(ekv.ErrWriteConflict, err) && terror.ErrorNotEqual(ekv.ErrKeyExists, err) 465 // If there is only 1 key and dagger fails, no need to do pessimistic rollback. 466 if len(keys) > 1 || keyMayBeLocked { 467 wg := txn.asyncPessimisticRollback(ctx, keys) 468 if dl, ok := errors.Cause(err).(*ErrDeadlock); ok && hashInKeys(dl.DeadlockKeyHash, keys) { 469 dl.IsRetryable = true 470 // Wait for the pessimistic rollback to finish before we retry the memex. 471 wg.Wait() 472 // Sleep a little, wait for the other transaction that blocked by this transaction to acquire the dagger. 473 time.Sleep(time.Millisecond * 5) 474 failpoint.Inject("SingleStmtDeadLockRetrySleep", func() { 475 time.Sleep(300 * time.Millisecond) 476 }) 477 } 478 } 479 if assignedPrimaryKey { 480 // unset the primary key if we assigned primary key when failed to dagger it. 481 txn.committer.primaryKey = nil 482 } 483 return err 484 } 485 if assignedPrimaryKey { 486 txn.committer.ttlManager.run(txn.committer, lockCtx) 487 } 488 } 489 for _, key := range keys { 490 valExists := ekv.SetKeyLockedValueExists 491 // PointGet and BatchPointGet will return value in pessimistic dagger response, the value may not exist. 492 // For other dagger modes, the locked key values always exist. 493 if lockCtx.ReturnValues { 494 val, _ := lockCtx.Values[string(key)] 495 if len(val.Value) == 0 { 496 valExists = ekv.SetKeyLockedValueNotExists 497 } 498 } 499 memBuf.UFIDelateFlags(key, ekv.SetKeyLocked, ekv.DelNeedCheckExists, valExists) 500 } 501 txn.lockedCnt += len(keys) 502 return nil 503 } 504 505 // deduplicateKeys deduplicate the keys, it use sort instead of map to avoid memory allocation. 506 func deduplicateKeys(keys [][]byte) [][]byte { 507 sort.Slice(keys, func(i, j int) bool { 508 return bytes.Compare(keys[i], keys[j]) < 0 509 }) 510 deduped := keys[:1] 511 for i := 1; i < len(keys); i++ { 512 if !bytes.Equal(deduped[len(deduped)-1], keys[i]) { 513 deduped = append(deduped, keys[i]) 514 } 515 } 516 return deduped 517 } 518 519 func (txn *einsteindbTxn) asyncPessimisticRollback(ctx context.Context, keys [][]byte) *sync.WaitGroup { 520 // Clone a new committer for execute in background. 521 committer := &twoPhaseCommitter{ 522 causetstore: txn.committer.causetstore, 523 connID: txn.committer.connID, 524 startTS: txn.committer.startTS, 525 forUFIDelateTS: txn.committer.forUFIDelateTS, 526 primaryKey: txn.committer.primaryKey, 527 } 528 wg := new(sync.WaitGroup) 529 wg.Add(1) 530 go func() { 531 failpoint.Inject("AsyncRollBackSleep", func() { 532 time.Sleep(100 * time.Millisecond) 533 }) 534 err := committer.pessimisticRollbackMutations(NewBackofferWithVars(ctx, pessimisticRollbackMaxBackoff, txn.vars), CommitterMutations{keys: keys}) 535 if err != nil { 536 logutil.Logger(ctx).Warn("[ekv] pessimisticRollback failed.", zap.Error(err)) 537 } 538 wg.Done() 539 }() 540 return wg 541 } 542 543 func hashInKeys(deadlockKeyHash uint64, keys [][]byte) bool { 544 for _, key := range keys { 545 if farm.Fingerprint64(key) == deadlockKeyHash { 546 return true 547 } 548 } 549 return false 550 } 551 552 func (txn *einsteindbTxn) IsReadOnly() bool { 553 return !txn.us.GetMemBuffer().Dirty() 554 } 555 556 func (txn *einsteindbTxn) StartTS() uint64 { 557 return txn.startTS 558 } 559 560 func (txn *einsteindbTxn) Valid() bool { 561 return txn.valid 562 } 563 564 func (txn *einsteindbTxn) Len() int { 565 return txn.us.GetMemBuffer().Len() 566 } 567 568 func (txn *einsteindbTxn) Size() int { 569 return txn.us.GetMemBuffer().Size() 570 } 571 572 func (txn *einsteindbTxn) Reset() { 573 txn.us.GetMemBuffer().Reset() 574 } 575 576 func (txn *einsteindbTxn) GetUnionStore() ekv.UnionStore { 577 return txn.us 578 } 579 580 func (txn *einsteindbTxn) GetMemBuffer() ekv.MemBuffer { 581 return txn.us.GetMemBuffer() 582 } 583 584 func (txn *einsteindbTxn) GetSnapshot() ekv.Snapshot { 585 return txn.snapshot 586 }