github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/interlock.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package einsteindb 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "io" 21 "sort" 22 "strconv" 23 "strings" 24 "sync" 25 "sync/atomic" 26 "time" 27 "unsafe" 28 29 "github.com/cznic/mathutil" 30 "github.com/gogo/protobuf/proto" 31 "github.com/whtcorpsinc/BerolinaSQL/terror" 32 "github.com/whtcorpsinc/ekvproto/pkg/ekvrpcpb" 33 "github.com/whtcorpsinc/ekvproto/pkg/interlock" 34 "github.com/whtcorpsinc/errors" 35 "github.com/whtcorpsinc/failpoint" 36 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 37 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc" 38 "github.com/whtcorpsinc/milevadb/ekv" 39 "github.com/whtcorpsinc/milevadb/errno" 40 "github.com/whtcorpsinc/milevadb/metrics" 41 "github.com/whtcorpsinc/milevadb/petri/infosync" 42 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 43 "github.com/whtcorpsinc/milevadb/soliton/logutil" 44 "github.com/whtcorpsinc/milevadb/soliton/memory" 45 "go.uber.org/zap" 46 ) 47 48 var einsteindbTxnRegionsNumHistogramWithCoprocessor = metrics.EinsteinDBTxnRegionsNumHistogram.WithLabelValues("interlock") 49 var einsteindbTxnRegionsNumHistogramWithBatchCoprocessor = metrics.EinsteinDBTxnRegionsNumHistogram.WithLabelValues("batch_coprocessor") 50 51 // CopClient is interlock client. 52 type CopClient struct { 53 ekv.RequestTypeSupportedChecker 54 causetstore *einsteindbStore 55 replicaReadSeed uint32 56 } 57 58 // Send builds the request and gets the interlock iterator response. 59 func (c *CopClient) Send(ctx context.Context, req *ekv.Request, vars *ekv.Variables) ekv.Response { 60 if req.StoreType == ekv.TiFlash && req.BatchCop { 61 logutil.BgLogger().Debug("send batch requests") 62 return c.sendBatch(ctx, req, vars) 63 } 64 ctx = context.WithValue(ctx, txnStartKey, req.StartTs) 65 bo := NewBackofferWithVars(ctx, copBuildTaskMaxBackoff, vars) 66 tasks, err := buildCausetTasks(bo, c.causetstore.regionCache, &copRanges{mid: req.KeyRanges}, req) 67 if err != nil { 68 return copErrorResponse{err} 69 } 70 it := &copIterator{ 71 causetstore: c.causetstore, 72 req: req, 73 concurrency: req.Concurrency, 74 finishCh: make(chan struct{}), 75 vars: vars, 76 memTracker: req.MemTracker, 77 replicaReadSeed: c.replicaReadSeed, 78 rpcCancel: NewRPCanceller(), 79 } 80 it.minCommitTSPushed.data = make(map[uint64]struct{}, 5) 81 it.tasks = tasks 82 if it.concurrency > len(tasks) { 83 it.concurrency = len(tasks) 84 } 85 if it.concurrency < 1 { 86 // Make sure that there is at least one worker. 87 it.concurrency = 1 88 } 89 90 if it.req.KeepOrder { 91 it.sendRate = newRateLimit(2 * it.concurrency) 92 } else { 93 it.respChan = make(chan *copResponse, it.concurrency) 94 it.sendRate = newRateLimit(it.concurrency) 95 } 96 97 if !it.req.Streaming { 98 ctx = context.WithValue(ctx, RPCCancellerCtxKey{}, it.rpcCancel) 99 } 100 it.open(ctx) 101 return it 102 } 103 104 // copTask contains a related Region and KeyRange for a ekv.Request. 105 type copTask struct { 106 region RegionVerID 107 ranges *copRanges 108 109 respChan chan *copResponse 110 storeAddr string 111 cmdType einsteindbrpc.CmdType 112 storeType ekv.StoreType 113 } 114 115 func (r *copTask) String() string { 116 return fmt.Sprintf("region(%d %d %d) ranges(%d) causetstore(%s)", 117 r.region.id, r.region.confVer, r.region.ver, r.ranges.len(), r.storeAddr) 118 } 119 120 // copRanges is like []ekv.KeyRange, but may has extra elements at head/tail. 121 // It's for avoiding alloc big slice during build copTask. 122 type copRanges struct { 123 first *ekv.KeyRange 124 mid []ekv.KeyRange 125 last *ekv.KeyRange 126 } 127 128 func (r *copRanges) String() string { 129 var s string 130 r.do(func(ran *ekv.KeyRange) { 131 s += fmt.Sprintf("[%q, %q]", ran.StartKey, ran.EndKey) 132 }) 133 return s 134 } 135 136 func (r *copRanges) len() int { 137 var l int 138 if r.first != nil { 139 l++ 140 } 141 l += len(r.mid) 142 if r.last != nil { 143 l++ 144 } 145 return l 146 } 147 148 func (r *copRanges) at(i int) ekv.KeyRange { 149 if r.first != nil { 150 if i == 0 { 151 return *r.first 152 } 153 i-- 154 } 155 if i < len(r.mid) { 156 return r.mid[i] 157 } 158 return *r.last 159 } 160 161 func (r *copRanges) slice(from, to int) *copRanges { 162 var ran copRanges 163 if r.first != nil { 164 if from == 0 && to > 0 { 165 ran.first = r.first 166 } 167 if from > 0 { 168 from-- 169 } 170 if to > 0 { 171 to-- 172 } 173 } 174 if to <= len(r.mid) { 175 ran.mid = r.mid[from:to] 176 } else { 177 if from <= len(r.mid) { 178 ran.mid = r.mid[from:] 179 } 180 if from < to { 181 ran.last = r.last 182 } 183 } 184 return &ran 185 } 186 187 func (r *copRanges) do(f func(ran *ekv.KeyRange)) { 188 if r.first != nil { 189 f(r.first) 190 } 191 for _, ran := range r.mid { 192 f(&ran) 193 } 194 if r.last != nil { 195 f(r.last) 196 } 197 } 198 199 func (r *copRanges) toPBRanges() []*interlock.KeyRange { 200 ranges := make([]*interlock.KeyRange, 0, r.len()) 201 r.do(func(ran *ekv.KeyRange) { 202 ranges = append(ranges, &interlock.KeyRange{ 203 Start: ran.StartKey, 204 End: ran.EndKey, 205 }) 206 }) 207 return ranges 208 } 209 210 // split ranges into (left, right) by key. 211 func (r *copRanges) split(key []byte) (*copRanges, *copRanges) { 212 n := sort.Search(r.len(), func(i int) bool { 213 cur := r.at(i) 214 return len(cur.EndKey) == 0 || bytes.Compare(cur.EndKey, key) > 0 215 }) 216 // If a range p contains the key, it will split to 2 parts. 217 if n < r.len() { 218 p := r.at(n) 219 if bytes.Compare(key, p.StartKey) > 0 { 220 left := r.slice(0, n) 221 left.last = &ekv.KeyRange{StartKey: p.StartKey, EndKey: key} 222 right := r.slice(n+1, r.len()) 223 right.first = &ekv.KeyRange{StartKey: key, EndKey: p.EndKey} 224 return left, right 225 } 226 } 227 return r.slice(0, n), r.slice(n, r.len()) 228 } 229 230 // rangesPerTask limits the length of the ranges slice sent in one copTask. 231 const rangesPerTask = 25000 232 233 func buildCausetTasks(bo *Backoffer, cache *RegionCache, ranges *copRanges, req *ekv.Request) ([]*copTask, error) { 234 start := time.Now() 235 cmdType := einsteindbrpc.CmdCop 236 if req.Streaming { 237 cmdType = einsteindbrpc.CmdCopStream 238 } 239 240 if req.StoreType == ekv.MilevaDB { 241 return buildMilevaDBMemCausetTasks(ranges, req) 242 } 243 244 rangesLen := ranges.len() 245 var tasks []*copTask 246 appendTask := func(regionWithRangeInfo *KeyLocation, ranges *copRanges) { 247 // EinsteinDB will return gRPC error if the message is too large. So we need to limit the length of the ranges slice 248 // to make sure the message can be sent successfully. 249 rLen := ranges.len() 250 for i := 0; i < rLen; { 251 nextI := mathutil.Min(i+rangesPerTask, rLen) 252 tasks = append(tasks, &copTask{ 253 region: regionWithRangeInfo.Region, 254 ranges: ranges.slice(i, nextI), 255 // Channel buffer is 2 for handling region split. 256 // In a common case, two region split tasks will not be blocked. 257 respChan: make(chan *copResponse, 2), 258 cmdType: cmdType, 259 storeType: req.StoreType, 260 }) 261 i = nextI 262 } 263 } 264 265 err := splitRanges(bo, cache, ranges, appendTask) 266 if err != nil { 267 return nil, errors.Trace(err) 268 } 269 270 if req.Desc { 271 reverseTasks(tasks) 272 } 273 if elapsed := time.Since(start); elapsed > time.Millisecond*500 { 274 logutil.BgLogger().Warn("buildCausetTasks takes too much time", 275 zap.Duration("elapsed", elapsed), 276 zap.Int("range len", rangesLen), 277 zap.Int("task len", len(tasks))) 278 } 279 einsteindbTxnRegionsNumHistogramWithCoprocessor.Observe(float64(len(tasks))) 280 return tasks, nil 281 } 282 283 func buildMilevaDBMemCausetTasks(ranges *copRanges, req *ekv.Request) ([]*copTask, error) { 284 servers, err := infosync.GetAllServerInfo(context.Background()) 285 if err != nil { 286 return nil, err 287 } 288 cmdType := einsteindbrpc.CmdCop 289 if req.Streaming { 290 cmdType = einsteindbrpc.CmdCopStream 291 } 292 tasks := make([]*copTask, 0, len(servers)) 293 for _, ser := range servers { 294 addr := ser.IP + ":" + strconv.FormatUint(uint64(ser.StatusPort), 10) 295 tasks = append(tasks, &copTask{ 296 ranges: ranges, 297 respChan: make(chan *copResponse, 2), 298 cmdType: cmdType, 299 storeType: req.StoreType, 300 storeAddr: addr, 301 }) 302 } 303 return tasks, nil 304 } 305 306 func splitRanges(bo *Backoffer, cache *RegionCache, ranges *copRanges, fn func(regionWithRangeInfo *KeyLocation, ranges *copRanges)) error { 307 for ranges.len() > 0 { 308 loc, err := cache.LocateKey(bo, ranges.at(0).StartKey) 309 if err != nil { 310 return errors.Trace(err) 311 } 312 313 // Iterate to the first range that is not complete in the region. 314 var i int 315 for ; i < ranges.len(); i++ { 316 r := ranges.at(i) 317 if !(loc.Contains(r.EndKey) || bytes.Equal(loc.EndKey, r.EndKey)) { 318 break 319 } 320 } 321 // All rest ranges belong to the same region. 322 if i == ranges.len() { 323 fn(loc, ranges) 324 break 325 } 326 327 r := ranges.at(i) 328 if loc.Contains(r.StartKey) { 329 // Part of r is not in the region. We need to split it. 330 taskRanges := ranges.slice(0, i) 331 taskRanges.last = &ekv.KeyRange{ 332 StartKey: r.StartKey, 333 EndKey: loc.EndKey, 334 } 335 fn(loc, taskRanges) 336 337 ranges = ranges.slice(i+1, ranges.len()) 338 ranges.first = &ekv.KeyRange{ 339 StartKey: loc.EndKey, 340 EndKey: r.EndKey, 341 } 342 } else { 343 // rs[i] is not in the region. 344 taskRanges := ranges.slice(0, i) 345 fn(loc, taskRanges) 346 ranges = ranges.slice(i, ranges.len()) 347 } 348 } 349 350 return nil 351 } 352 353 // SplitRegionRanges get the split ranges from fidel region. 354 func SplitRegionRanges(bo *Backoffer, cache *RegionCache, keyRanges []ekv.KeyRange) ([]ekv.KeyRange, error) { 355 ranges := copRanges{mid: keyRanges} 356 357 var ret []ekv.KeyRange 358 appendRange := func(regionWithRangeInfo *KeyLocation, ranges *copRanges) { 359 for i := 0; i < ranges.len(); i++ { 360 ret = append(ret, ranges.at(i)) 361 } 362 } 363 364 err := splitRanges(bo, cache, &ranges, appendRange) 365 if err != nil { 366 return nil, errors.Trace(err) 367 } 368 return ret, nil 369 } 370 371 func reverseTasks(tasks []*copTask) { 372 for i := 0; i < len(tasks)/2; i++ { 373 j := len(tasks) - i - 1 374 tasks[i], tasks[j] = tasks[j], tasks[i] 375 } 376 } 377 378 type copIterator struct { 379 causetstore *einsteindbStore 380 req *ekv.Request 381 concurrency int 382 finishCh chan struct{} 383 384 // If keepOrder, results are stored in copTask.respChan, read them out one by one. 385 tasks []*copTask 386 curr int 387 388 // sendRate controls the sending rate of copIteratorTaskSender 389 sendRate *rateLimit 390 391 // Otherwise, results are stored in respChan. 392 respChan chan *copResponse 393 394 vars *ekv.Variables 395 396 memTracker *memory.Tracker 397 398 replicaReadSeed uint32 399 400 rpcCancel *RPCCanceller 401 402 wg sync.WaitGroup 403 // closed represents when the Close is called. 404 // There are two cases we need to close the `finishCh` channel, one is when context is done, the other one is 405 // when the Close is called. we use atomic.CompareAndSwap `closed` to to make sure the channel is not closed twice. 406 closed uint32 407 408 minCommitTSPushed 409 } 410 411 // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. 412 type copIteratorWorker struct { 413 taskCh <-chan *copTask 414 wg *sync.WaitGroup 415 causetstore *einsteindbStore 416 req *ekv.Request 417 respChan chan<- *copResponse 418 finishCh <-chan struct{} 419 vars *ekv.Variables 420 clientHelper 421 422 memTracker *memory.Tracker 423 424 replicaReadSeed uint32 425 426 sendRate *rateLimit 427 } 428 429 // copIteratorTaskSender sends tasks to taskCh then wait for the workers to exit. 430 type copIteratorTaskSender struct { 431 taskCh chan<- *copTask 432 wg *sync.WaitGroup 433 tasks []*copTask 434 finishCh <-chan struct{} 435 respChan chan<- *copResponse 436 sendRate *rateLimit 437 } 438 439 type copResponse struct { 440 pbResp *interlock.Response 441 detail *CopRuntimeStats 442 startKey ekv.Key 443 err error 444 respSize int64 445 respTime time.Duration 446 } 447 448 const ( 449 sizeofInterDircDetails = int(unsafe.Sizeof(execdetails.InterDircDetails{})) 450 sizeofCommitDetails = int(unsafe.Sizeof(execdetails.CommitDetails{})) 451 ) 452 453 // GetData implements the ekv.ResultSubset GetData interface. 454 func (rs *copResponse) GetData() []byte { 455 return rs.pbResp.Data 456 } 457 458 // GetStartKey implements the ekv.ResultSubset GetStartKey interface. 459 func (rs *copResponse) GetStartKey() ekv.Key { 460 return rs.startKey 461 } 462 463 func (rs *copResponse) GetCopRuntimeStats() *CopRuntimeStats { 464 return rs.detail 465 } 466 467 // MemSize returns how many bytes of memory this response use 468 func (rs *copResponse) MemSize() int64 { 469 if rs.respSize != 0 { 470 return rs.respSize 471 } 472 473 // ignore rs.err 474 rs.respSize += int64(cap(rs.startKey)) 475 if rs.detail != nil { 476 rs.respSize += int64(sizeofInterDircDetails) 477 } 478 if rs.pbResp != nil { 479 // Using a approximate size since it's hard to get a accurate value. 480 rs.respSize += int64(rs.pbResp.Size()) 481 } 482 return rs.respSize 483 } 484 485 func (rs *copResponse) RespTime() time.Duration { 486 return rs.respTime 487 } 488 489 const minLogCopTaskTime = 300 * time.Millisecond 490 491 // run is a worker function that get a copTask from channel, handle it and 492 // send the result back. 493 func (worker *copIteratorWorker) run(ctx context.Context) { 494 defer worker.wg.Done() 495 for task := range worker.taskCh { 496 respCh := worker.respChan 497 if respCh == nil { 498 respCh = task.respChan 499 } 500 501 worker.handleTask(ctx, task, respCh) 502 close(task.respChan) 503 if worker.respChan != nil { 504 worker.sendRate.putToken() 505 } 506 if worker.vars != nil && worker.vars.Killed != nil && atomic.LoadUint32(worker.vars.Killed) == 1 { 507 return 508 } 509 select { 510 case <-worker.finishCh: 511 return 512 default: 513 } 514 } 515 } 516 517 // open starts workers and sender goroutines. 518 func (it *copIterator) open(ctx context.Context) { 519 taskCh := make(chan *copTask, 1) 520 it.wg.Add(it.concurrency) 521 // Start it.concurrency number of workers to handle cop requests. 522 for i := 0; i < it.concurrency; i++ { 523 worker := &copIteratorWorker{ 524 taskCh: taskCh, 525 wg: &it.wg, 526 causetstore: it.causetstore, 527 req: it.req, 528 respChan: it.respChan, 529 finishCh: it.finishCh, 530 vars: it.vars, 531 clientHelper: clientHelper{ 532 LockResolver: it.causetstore.lockResolver, 533 RegionCache: it.causetstore.regionCache, 534 minCommitTSPushed: &it.minCommitTSPushed, 535 Client: it.causetstore.client, 536 }, 537 538 memTracker: it.memTracker, 539 540 replicaReadSeed: it.replicaReadSeed, 541 sendRate: it.sendRate, 542 } 543 go worker.run(ctx) 544 } 545 taskSender := &copIteratorTaskSender{ 546 taskCh: taskCh, 547 wg: &it.wg, 548 tasks: it.tasks, 549 finishCh: it.finishCh, 550 sendRate: it.sendRate, 551 } 552 taskSender.respChan = it.respChan 553 go taskSender.run() 554 } 555 556 func (sender *copIteratorTaskSender) run() { 557 // Send tasks to feed the worker goroutines. 558 for _, t := range sender.tasks { 559 // we control the sending rate to prevent all tasks 560 // being done (aka. all of the responses are buffered) by copIteratorWorker. 561 // We keep the number of inflight tasks within the number of 2 * concurrency when Keep Order is true. 562 // If KeepOrder is false, the number equals the concurrency. 563 // It sends one more task if a task has been finished in copIterator.Next. 564 exit := sender.sendRate.getToken(sender.finishCh) 565 if exit { 566 break 567 } 568 exit = sender.sendToTaskCh(t) 569 if exit { 570 break 571 } 572 } 573 close(sender.taskCh) 574 575 // Wait for worker goroutines to exit. 576 sender.wg.Wait() 577 if sender.respChan != nil { 578 close(sender.respChan) 579 } 580 } 581 582 func (it *copIterator) recvFromRespCh(ctx context.Context, respCh <-chan *copResponse) (resp *copResponse, ok bool, exit bool) { 583 ticker := time.NewTicker(3 * time.Second) 584 defer ticker.Stop() 585 for { 586 select { 587 case resp, ok = <-respCh: 588 if it.memTracker != nil && resp != nil { 589 it.memTracker.Consume(-resp.MemSize()) 590 } 591 return 592 case <-it.finishCh: 593 exit = true 594 return 595 case <-ticker.C: 596 if atomic.LoadUint32(it.vars.Killed) == 1 { 597 resp = &copResponse{err: ErrQueryInterrupted} 598 ok = true 599 return 600 } 601 case <-ctx.Done(): 602 // We select the ctx.Done() in the thread of `Next` instead of in the worker to avoid the cost of `WithCancel`. 603 if atomic.CompareAndSwapUint32(&it.closed, 0, 1) { 604 close(it.finishCh) 605 } 606 exit = true 607 return 608 } 609 } 610 } 611 612 func (sender *copIteratorTaskSender) sendToTaskCh(t *copTask) (exit bool) { 613 select { 614 case sender.taskCh <- t: 615 case <-sender.finishCh: 616 exit = true 617 } 618 return 619 } 620 621 func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- *copResponse, checkOOM bool) (exit bool) { 622 if worker.memTracker != nil && checkOOM { 623 worker.memTracker.Consume(resp.MemSize()) 624 } 625 select { 626 case respCh <- resp: 627 case <-worker.finishCh: 628 exit = true 629 } 630 return 631 } 632 633 // Next returns next interlock result. 634 // NOTE: Use nil to indicate finish, so if the returned ResultSubset is not nil, reader should continue to call Next(). 635 func (it *copIterator) Next(ctx context.Context) (ekv.ResultSubset, error) { 636 var ( 637 resp *copResponse 638 ok bool 639 closed bool 640 ) 641 // If data order matters, response should be returned in the same order as copTask slice. 642 // Otherwise all responses are returned from a single channel. 643 if it.respChan != nil { 644 // Get next fetched resp from chan 645 resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) 646 if !ok || closed { 647 return nil, nil 648 } 649 } else { 650 for { 651 if it.curr >= len(it.tasks) { 652 // Resp will be nil if iterator is finishCh. 653 return nil, nil 654 } 655 task := it.tasks[it.curr] 656 resp, ok, closed = it.recvFromRespCh(ctx, task.respChan) 657 if closed { 658 // Close() is already called, so Next() is invalid. 659 return nil, nil 660 } 661 if ok { 662 break 663 } 664 // Switch to next task. 665 it.tasks[it.curr] = nil 666 it.curr++ 667 it.sendRate.putToken() 668 } 669 } 670 671 if resp.err != nil { 672 return nil, errors.Trace(resp.err) 673 } 674 675 err := it.causetstore.CheckVisibility(it.req.StartTs) 676 if err != nil { 677 return nil, errors.Trace(err) 678 } 679 return resp, nil 680 } 681 682 // Associate each region with an independent backoffer. In this way, when multiple regions are 683 // unavailable, MilevaDB can execute very quickly without blocking 684 func chooseBackoffer(ctx context.Context, backoffermap map[uint64]*Backoffer, task *copTask, worker *copIteratorWorker) *Backoffer { 685 bo, ok := backoffermap[task.region.id] 686 if ok { 687 return bo 688 } 689 newbo := NewBackofferWithVars(ctx, copNextMaxBackoff, worker.vars) 690 backoffermap[task.region.id] = newbo 691 return newbo 692 } 693 694 // handleTask handles single copTask, sends the result to channel, retry automatically on error. 695 func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, respCh chan<- *copResponse) { 696 defer func() { 697 r := recover() 698 if r != nil { 699 logutil.BgLogger().Error("copIteratorWork meet panic", 700 zap.Reflect("r", r), 701 zap.Stack("stack trace")) 702 resp := &copResponse{err: errors.Errorf("%v", r)} 703 // if panic has happened, set checkOOM to false to avoid another panic. 704 worker.sendToRespCh(resp, respCh, false) 705 } 706 }() 707 remainTasks := []*copTask{task} 708 backoffermap := make(map[uint64]*Backoffer) 709 for len(remainTasks) > 0 { 710 curTask := remainTasks[0] 711 bo := chooseBackoffer(ctx, backoffermap, curTask, worker) 712 tasks, err := worker.handleTaskOnce(bo, curTask, respCh) 713 if err != nil { 714 resp := &copResponse{err: errors.Trace(err)} 715 worker.sendToRespCh(resp, respCh, true) 716 return 717 } 718 // test whether the ctx is cancelled 719 if bo.vars != nil && bo.vars.Killed != nil && atomic.LoadUint32(bo.vars.Killed) == 1 { 720 return 721 } 722 723 if len(tasks) > 0 { 724 remainTasks = append(tasks, remainTasks[1:]...) 725 } else { 726 remainTasks = remainTasks[1:] 727 } 728 } 729 } 730 731 // handleTaskOnce handles single copTask, successful results are send to channel. 732 // If error happened, returns error. If region split or meet dagger, returns the remain tasks. 733 func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch chan<- *copResponse) ([]*copTask, error) { 734 failpoint.Inject("handleTaskOnceError", func(val failpoint.Value) { 735 if val.(bool) { 736 failpoint.Return(nil, errors.New("mock handleTaskOnce error")) 737 } 738 }) 739 740 copReq := interlock.Request{ 741 Tp: worker.req.Tp, 742 StartTs: worker.req.StartTs, 743 Data: worker.req.Data, 744 Ranges: task.ranges.toPBRanges(), 745 SchemaVer: worker.req.SchemaVar, 746 } 747 748 var cacheKey []byte = nil 749 var cacheValue *coprCacheValue = nil 750 751 // If there are many ranges, it is very likely to be a TableLookupRequest. They are not worth to cache since 752 // computing is not the main cost. Ignore such requests directly to avoid slowly building the cache key. 753 if task.cmdType == einsteindbrpc.CmdCop && worker.causetstore.coprCache != nil && worker.req.Cacheable && len(copReq.Ranges) < 10 { 754 cKey, err := coprCacheBuildKey(&copReq) 755 if err == nil { 756 cacheKey = cKey 757 cValue := worker.causetstore.coprCache.Get(cKey) 758 copReq.IsCacheEnabled = true 759 if cValue != nil && cValue.RegionID == task.region.id && cValue.TimeStamp <= worker.req.StartTs { 760 // Append cache version to the request to skip Coprocessor computation if possible 761 // when request result is cached 762 copReq.CacheIfMatchVersion = cValue.RegionDataVersion 763 cacheValue = cValue 764 } else { 765 copReq.CacheIfMatchVersion = 0 766 } 767 } else { 768 logutil.BgLogger().Warn("Failed to build copr cache key", zap.Error(err)) 769 } 770 } 771 772 req := einsteindbrpc.NewReplicaReadRequest(task.cmdType, &copReq, worker.req.ReplicaRead, &worker.replicaReadSeed, ekvrpcpb.Context{ 773 IsolationLevel: pbIsolationLevel(worker.req.IsolationLevel), 774 Priority: ekvPriorityToCommandPri(worker.req.Priority), 775 NotFillCache: worker.req.NotFillCache, 776 HandleTime: true, 777 ScanDetail: true, 778 TaskId: worker.req.TaskID, 779 }) 780 req.StoreTp = task.storeType 781 startTime := time.Now() 782 if worker.Stats == nil { 783 worker.Stats = make(map[einsteindbrpc.CmdType]*RPCRuntimeStats) 784 } 785 resp, rpcCtx, storeAddr, err := worker.SendReqCtx(bo, req, task.region, ReadTimeoutMedium, task.storeType, task.storeAddr) 786 if err != nil { 787 if task.storeType == ekv.MilevaDB { 788 err = worker.handleMilevaDBSendReqErr(err, task, ch) 789 return nil, err 790 } 791 return nil, errors.Trace(err) 792 } 793 794 // Set task.storeAddr field so its task.String() method have the causetstore address information. 795 task.storeAddr = storeAddr 796 costTime := time.Since(startTime) 797 if costTime > minLogCopTaskTime { 798 worker.logTimeCopTask(costTime, task, bo, resp) 799 } 800 metrics.EinsteinDBCoprocessorHistogram.Observe(costTime.Seconds()) 801 802 if task.cmdType == einsteindbrpc.CmdCopStream { 803 return worker.handleCopStreamResult(bo, rpcCtx, resp.Resp.(*einsteindbrpc.CopStreamResponse), task, ch, costTime) 804 } 805 806 // Handles the response for non-streaming copTask. 807 return worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: resp.Resp.(*interlock.Response)}, cacheKey, cacheValue, task, ch, nil, costTime) 808 } 809 810 type minCommitTSPushed struct { 811 data map[uint64]struct{} 812 sync.RWMutex 813 } 814 815 func (m *minCommitTSPushed) UFIDelate(from []uint64) { 816 m.Lock() 817 for _, v := range from { 818 m.data[v] = struct{}{} 819 } 820 m.Unlock() 821 } 822 823 func (m *minCommitTSPushed) Get() []uint64 { 824 m.RLock() 825 defer m.RUnlock() 826 if len(m.data) == 0 { 827 return nil 828 } 829 830 ret := make([]uint64, 0, len(m.data)) 831 for k := range m.data { 832 ret = append(ret, k) 833 } 834 return ret 835 } 836 837 // clientHelper wraps LockResolver and RegionRequestSender. 838 // It's introduced to support the new dagger resolving pattern in the large transaction. 839 // In the large transaction protocol, sending requests and resolving locks are 840 // context-dependent. For example, when a send request meets a secondary dagger, we'll 841 // call ResolveLock, and if the dagger belongs to a large transaction, we may retry 842 // the request. If there is no context information about the resolved locks, we'll 843 // meet the secondary dagger again and run into a deadloop. 844 type clientHelper struct { 845 *LockResolver 846 *RegionCache 847 *minCommitTSPushed 848 Client 849 resolveLite bool 850 RegionRequestRuntimeStats 851 } 852 853 // ResolveLocks wraps the ResolveLocks function and causetstore the resolved result. 854 func (ch *clientHelper) ResolveLocks(bo *Backoffer, callerStartTS uint64, locks []*Lock) (int64, error) { 855 var err error 856 var resolvedLocks []uint64 857 var msBeforeTxnExpired int64 858 if ch.Stats != nil { 859 defer func(start time.Time) { 860 recordRegionRequestRuntimeStats(ch.Stats, einsteindbrpc.CmdResolveLock, time.Since(start)) 861 }(time.Now()) 862 } 863 if ch.resolveLite { 864 msBeforeTxnExpired, resolvedLocks, err = ch.LockResolver.resolveLocksLite(bo, callerStartTS, locks) 865 } else { 866 msBeforeTxnExpired, resolvedLocks, err = ch.LockResolver.ResolveLocks(bo, callerStartTS, locks) 867 } 868 if err != nil { 869 return msBeforeTxnExpired, err 870 } 871 if len(resolvedLocks) > 0 { 872 ch.minCommitTSPushed.UFIDelate(resolvedLocks) 873 return 0, nil 874 } 875 return msBeforeTxnExpired, nil 876 } 877 878 // SendReqCtx wraps the SendReqCtx function and use the resolved dagger result in the ekvrpcpb.Context. 879 func (ch *clientHelper) SendReqCtx(bo *Backoffer, req *einsteindbrpc.Request, regionID RegionVerID, timeout time.Duration, sType ekv.StoreType, directStoreAddr string) (*einsteindbrpc.Response, *RPCContext, string, error) { 880 sender := NewRegionRequestSender(ch.RegionCache, ch.Client) 881 if len(directStoreAddr) > 0 { 882 sender.storeAddr = directStoreAddr 883 } 884 sender.Stats = ch.Stats 885 req.Context.ResolvedLocks = ch.minCommitTSPushed.Get() 886 resp, ctx, err := sender.SendReqCtx(bo, req, regionID, timeout, sType) 887 return resp, ctx, sender.storeAddr, err 888 } 889 890 const ( 891 minLogBackoffTime = 100 892 minLogKVProcessTime = 100 893 minLogKVWaitTime = 200 894 ) 895 896 func (worker *copIteratorWorker) logTimeCopTask(costTime time.Duration, task *copTask, bo *Backoffer, resp *einsteindbrpc.Response) { 897 logStr := fmt.Sprintf("[TIME_COP_PROCESS] resp_time:%s txnStartTS:%d region_id:%d store_addr:%s", costTime, worker.req.StartTs, task.region.id, task.storeAddr) 898 if bo.totalSleep > minLogBackoffTime { 899 backoffTypes := strings.Replace(fmt.Sprintf("%v", bo.types), " ", ",", -1) 900 logStr += fmt.Sprintf(" backoff_ms:%d backoff_types:%s", bo.totalSleep, backoffTypes) 901 } 902 var detail *ekvrpcpb.InterDircDetails 903 if resp.Resp != nil { 904 switch r := resp.Resp.(type) { 905 case *interlock.Response: 906 detail = r.InterDircDetails 907 case *einsteindbrpc.CopStreamResponse: 908 // streaming request returns io.EOF, so the first CopStreamResponse.Response maybe nil. 909 if r.Response != nil { 910 detail = r.Response.InterDircDetails 911 } 912 default: 913 panic("unreachable") 914 } 915 } 916 917 if detail != nil && detail.HandleTime != nil { 918 processMs := detail.HandleTime.ProcessMs 919 waitMs := detail.HandleTime.WaitMs 920 if processMs > minLogKVProcessTime { 921 logStr += fmt.Sprintf(" ekv_process_ms:%d", processMs) 922 if detail.ScanDetail != nil { 923 logStr = appendScanDetail(logStr, "write", detail.ScanDetail.Write) 924 logStr = appendScanDetail(logStr, "data", detail.ScanDetail.Data) 925 logStr = appendScanDetail(logStr, "dagger", detail.ScanDetail.Lock) 926 } 927 } 928 if waitMs > minLogKVWaitTime { 929 logStr += fmt.Sprintf(" ekv_wait_ms:%d", waitMs) 930 if processMs <= minLogKVProcessTime { 931 logStr = strings.Replace(logStr, "TIME_COP_PROCESS", "TIME_COP_WAIT", 1) 932 } 933 } 934 } 935 logutil.Logger(bo.ctx).Info(logStr) 936 } 937 938 func appendScanDetail(logStr string, columnFamily string, scanInfo *ekvrpcpb.ScanInfo) string { 939 if scanInfo != nil { 940 logStr += fmt.Sprintf(" scan_total_%s:%d", columnFamily, scanInfo.Total) 941 logStr += fmt.Sprintf(" scan_processed_%s:%d", columnFamily, scanInfo.Processed) 942 } 943 return logStr 944 } 945 946 func (worker *copIteratorWorker) handleCopStreamResult(bo *Backoffer, rpcCtx *RPCContext, stream *einsteindbrpc.CopStreamResponse, task *copTask, ch chan<- *copResponse, costTime time.Duration) ([]*copTask, error) { 947 defer stream.Close() 948 var resp *interlock.Response 949 var lastRange *interlock.KeyRange 950 resp = stream.Response 951 if resp == nil { 952 // streaming request returns io.EOF, so the first Response is nil. 953 return nil, nil 954 } 955 for { 956 remainedTasks, err := worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: resp}, nil, nil, task, ch, lastRange, costTime) 957 if err != nil || len(remainedTasks) != 0 { 958 return remainedTasks, errors.Trace(err) 959 } 960 resp, err = stream.Recv() 961 if err != nil { 962 if errors.Cause(err) == io.EOF { 963 return nil, nil 964 } 965 966 if err1 := bo.Backoff(boEinsteinDBRPC, errors.Errorf("recv stream response error: %v, task: %s", err, task)); err1 != nil { 967 return nil, errors.Trace(err) 968 } 969 970 // No interlock.Response for network error, rebuild task based on the last success one. 971 if errors.Cause(err) == context.Canceled { 972 logutil.BgLogger().Info("stream recv timeout", zap.Error(err)) 973 } else { 974 logutil.BgLogger().Info("stream unknown error", zap.Error(err)) 975 } 976 return worker.buildCausetTasksFromRemain(bo, lastRange, task) 977 } 978 if resp.Range != nil { 979 lastRange = resp.Range 980 } 981 } 982 } 983 984 // handleCopResponse checks interlock Response for region split and dagger, 985 // returns more tasks when that happens, or handles the response if no error. 986 // if we're handling streaming interlock response, lastRange is the range of last 987 // successful response, otherwise it's nil. 988 func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, ch chan<- *copResponse, lastRange *interlock.KeyRange, costTime time.Duration) ([]*copTask, error) { 989 if regionErr := resp.pbResp.GetRegionError(); regionErr != nil { 990 if rpcCtx != nil && task.storeType == ekv.MilevaDB { 991 resp.err = errors.Errorf("error: %v", regionErr) 992 worker.sendToRespCh(resp, ch, true) 993 return nil, nil 994 } 995 errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", 996 task.region.id, task.region.ver, task.storeType.Name(), task.storeAddr, regionErr.String()) 997 if err := bo.Backoff(BoRegionMiss, errors.New(errStr)); err != nil { 998 return nil, errors.Trace(err) 999 } 1000 // We may meet RegionError at the first packet, but not during visiting the stream. 1001 return buildCausetTasks(bo, worker.causetstore.regionCache, task.ranges, worker.req) 1002 } 1003 if lockErr := resp.pbResp.GetLocked(); lockErr != nil { 1004 logutil.BgLogger().Debug("interlock encounters", 1005 zap.Stringer("dagger", lockErr)) 1006 msBeforeExpired, err1 := worker.ResolveLocks(bo, worker.req.StartTs, []*Lock{NewLock(lockErr)}) 1007 if err1 != nil { 1008 return nil, errors.Trace(err1) 1009 } 1010 if msBeforeExpired > 0 { 1011 if err := bo.BackoffWithMaxSleep(boTxnLockFast, int(msBeforeExpired), errors.New(lockErr.String())); err != nil { 1012 return nil, errors.Trace(err) 1013 } 1014 } 1015 return worker.buildCausetTasksFromRemain(bo, lastRange, task) 1016 } 1017 if otherErr := resp.pbResp.GetOtherError(); otherErr != "" { 1018 err := errors.Errorf("other error: %s", otherErr) 1019 logutil.BgLogger().Warn("other error", 1020 zap.Uint64("txnStartTS", worker.req.StartTs), 1021 zap.Uint64("regionID", task.region.id), 1022 zap.String("storeAddr", task.storeAddr), 1023 zap.Error(err)) 1024 return nil, errors.Trace(err) 1025 } 1026 // When the request is using streaming API, the `Range` is not nil. 1027 if resp.pbResp.Range != nil { 1028 resp.startKey = resp.pbResp.Range.Start 1029 } else if task.ranges != nil && task.ranges.len() > 0 { 1030 resp.startKey = task.ranges.at(0).StartKey 1031 } 1032 if resp.detail == nil { 1033 resp.detail = new(CopRuntimeStats) 1034 } 1035 resp.detail.Stats = worker.Stats 1036 worker.Stats = nil 1037 resp.detail.BackoffTime = time.Duration(bo.totalSleep) * time.Millisecond 1038 resp.detail.BackoffSleep = make(map[string]time.Duration, len(bo.backoffTimes)) 1039 resp.detail.BackoffTimes = make(map[string]int, len(bo.backoffTimes)) 1040 for backoff := range bo.backoffTimes { 1041 backoffName := backoff.String() 1042 resp.detail.BackoffTimes[backoffName] = bo.backoffTimes[backoff] 1043 resp.detail.BackoffSleep[backoffName] = time.Duration(bo.backoffSleepMS[backoff]) * time.Millisecond 1044 } 1045 if rpcCtx != nil { 1046 resp.detail.CalleeAddress = rpcCtx.Addr 1047 } 1048 resp.respTime = costTime 1049 if pbDetails := resp.pbResp.InterDircDetails; pbDetails != nil { 1050 if handleTime := pbDetails.HandleTime; handleTime != nil { 1051 resp.detail.WaitTime = time.Duration(handleTime.WaitMs) * time.Millisecond 1052 resp.detail.ProcessTime = time.Duration(handleTime.ProcessMs) * time.Millisecond 1053 } 1054 if scanDetail := pbDetails.ScanDetail; scanDetail != nil { 1055 if scanDetail.Write != nil { 1056 resp.detail.TotalKeys += scanDetail.Write.Total 1057 resp.detail.ProcessedKeys += scanDetail.Write.Processed 1058 } 1059 } 1060 } 1061 if resp.pbResp.IsCacheHit { 1062 if cacheValue == nil { 1063 return nil, errors.New("Internal error: received illegal EinsteinDB response") 1064 } 1065 // Cache hit and is valid: use cached data as response data and we don't uFIDelate the cache. 1066 data := make([]byte, len(cacheValue.Data)) 1067 copy(data, cacheValue.Data) 1068 resp.pbResp.Data = data 1069 resp.detail.CoprCacheHit = true 1070 } else { 1071 // Cache not hit or cache hit but not valid: uFIDelate the cache if the response can be cached. 1072 if cacheKey != nil && resp.pbResp.CanBeCached && resp.pbResp.CacheLastVersion > 0 { 1073 if worker.causetstore.coprCache.CheckAdmission(resp.pbResp.Data.Size(), resp.detail.ProcessTime) { 1074 data := make([]byte, len(resp.pbResp.Data)) 1075 copy(data, resp.pbResp.Data) 1076 1077 newCacheValue := coprCacheValue{ 1078 Data: data, 1079 TimeStamp: worker.req.StartTs, 1080 RegionID: task.region.id, 1081 RegionDataVersion: resp.pbResp.CacheLastVersion, 1082 } 1083 worker.causetstore.coprCache.Set(cacheKey, &newCacheValue) 1084 } 1085 } 1086 } 1087 worker.sendToRespCh(resp, ch, true) 1088 return nil, nil 1089 } 1090 1091 // CopRuntimeStats contains execution detail information. 1092 type CopRuntimeStats struct { 1093 execdetails.InterDircDetails 1094 RegionRequestRuntimeStats 1095 1096 CoprCacheHit bool 1097 } 1098 1099 func (worker *copIteratorWorker) handleMilevaDBSendReqErr(err error, task *copTask, ch chan<- *copResponse) error { 1100 errCode := errno.ErrUnknown 1101 errMsg := err.Error() 1102 if terror.ErrorEqual(err, ErrEinsteinDBServerTimeout) { 1103 errCode = errno.ErrEinsteinDBServerTimeout 1104 errMsg = "MilevaDB server timeout, address is " + task.storeAddr 1105 } 1106 selResp := fidelpb.SelectResponse{ 1107 Warnings: []*fidelpb.Error{ 1108 { 1109 Code: int32(errCode), 1110 Msg: errMsg, 1111 }, 1112 }, 1113 } 1114 data, err := proto.Marshal(&selResp) 1115 if err != nil { 1116 return errors.Trace(err) 1117 } 1118 resp := &copResponse{ 1119 pbResp: &interlock.Response{ 1120 Data: data, 1121 }, 1122 detail: &CopRuntimeStats{}, 1123 } 1124 worker.sendToRespCh(resp, ch, true) 1125 return nil 1126 } 1127 1128 func (worker *copIteratorWorker) buildCausetTasksFromRemain(bo *Backoffer, lastRange *interlock.KeyRange, task *copTask) ([]*copTask, error) { 1129 remainedRanges := task.ranges 1130 if worker.req.Streaming && lastRange != nil { 1131 remainedRanges = worker.calculateRemain(task.ranges, lastRange, worker.req.Desc) 1132 } 1133 return buildCausetTasks(bo, worker.causetstore.regionCache, remainedRanges, worker.req) 1134 } 1135 1136 // calculateRemain splits the input ranges into two, and take one of them according to desc flag. 1137 // It's used in streaming API, to calculate which range is consumed and what needs to be retry. 1138 // For example: 1139 // ranges: [r1 --> r2) [r3 --> r4) 1140 // split: [s1 --> s2) 1141 // In normal scan order, all data before s1 is consumed, so the remain ranges should be [s1 --> r2) [r3 --> r4) 1142 // In reverse scan order, all data after s2 is consumed, so the remain ranges should be [r1 --> r2) [r3 --> s2) 1143 func (worker *copIteratorWorker) calculateRemain(ranges *copRanges, split *interlock.KeyRange, desc bool) *copRanges { 1144 if desc { 1145 left, _ := ranges.split(split.End) 1146 return left 1147 } 1148 _, right := ranges.split(split.Start) 1149 return right 1150 } 1151 1152 func (it *copIterator) Close() error { 1153 if atomic.CompareAndSwapUint32(&it.closed, 0, 1) { 1154 close(it.finishCh) 1155 } 1156 it.rpcCancel.CancelAll() 1157 it.wg.Wait() 1158 return nil 1159 } 1160 1161 type rateLimit struct { 1162 token chan struct{} 1163 } 1164 1165 func newRateLimit(n int) *rateLimit { 1166 return &rateLimit{ 1167 token: make(chan struct{}, n), 1168 } 1169 } 1170 1171 func (r *rateLimit) getToken(done <-chan struct{}) (exit bool) { 1172 select { 1173 case <-done: 1174 return true 1175 case r.token <- struct{}{}: 1176 return false 1177 } 1178 } 1179 1180 func (r *rateLimit) putToken() { 1181 select { 1182 case <-r.token: 1183 default: 1184 panic("put a redundant token") 1185 } 1186 } 1187 1188 // copErrorResponse returns error when calling Next() 1189 type copErrorResponse struct{ error } 1190 1191 func (it copErrorResponse) Next(ctx context.Context) (ekv.ResultSubset, error) { 1192 return nil, it.error 1193 } 1194 1195 func (it copErrorResponse) Close() error { 1196 return nil 1197 }