github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/batch_coprocessor.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package einsteindb 15 16 import ( 17 "context" 18 "io" 19 "sync" 20 "sync/atomic" 21 "time" 22 23 "github.com/whtcorpsinc/ekvproto/pkg/ekvrpcpb" 24 "github.com/whtcorpsinc/ekvproto/pkg/interlock" 25 "github.com/whtcorpsinc/ekvproto/pkg/spacetimepb" 26 "github.com/whtcorpsinc/errors" 27 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc" 28 "github.com/whtcorpsinc/milevadb/ekv" 29 "github.com/whtcorpsinc/milevadb/soliton/logutil" 30 "github.com/whtcorpsinc/milevadb/soliton/memory" 31 "go.uber.org/zap" 32 ) 33 34 // batchCopTask comprises of multiple copTask that will send to same causetstore. 35 type batchCopTask struct { 36 storeAddr string 37 cmdType einsteindbrpc.CmdType 38 39 CausetTasks []copTaskAndRPCContext 40 } 41 42 type batchCopResponse struct { 43 pbResp *interlock.BatchResponse 44 detail *CopRuntimeStats 45 46 // batch Causet Response is yet to return startKey. So batchCop cannot retry partially. 47 startKey ekv.Key 48 err error 49 respSize int64 50 respTime time.Duration 51 } 52 53 // GetData implements the ekv.ResultSubset GetData interface. 54 func (rs *batchCopResponse) GetData() []byte { 55 return rs.pbResp.Data 56 } 57 58 // GetStartKey implements the ekv.ResultSubset GetStartKey interface. 59 func (rs *batchCopResponse) GetStartKey() ekv.Key { 60 return rs.startKey 61 } 62 63 // GetInterDircDetails is unavailable currently, because TiFlash has not collected exec details for batch cop. 64 // TODO: Will fix in near future. 65 func (rs *batchCopResponse) GetCopRuntimeStats() *CopRuntimeStats { 66 return rs.detail 67 } 68 69 // MemSize returns how many bytes of memory this response use 70 func (rs *batchCopResponse) MemSize() int64 { 71 if rs.respSize != 0 { 72 return rs.respSize 73 } 74 75 // ignore rs.err 76 rs.respSize += int64(cap(rs.startKey)) 77 if rs.detail != nil { 78 rs.respSize += int64(sizeofInterDircDetails) 79 } 80 if rs.pbResp != nil { 81 // Using a approximate size since it's hard to get a accurate value. 82 rs.respSize += int64(rs.pbResp.Size()) 83 } 84 return rs.respSize 85 } 86 87 func (rs *batchCopResponse) RespTime() time.Duration { 88 return rs.respTime 89 } 90 91 type copTaskAndRPCContext struct { 92 task *copTask 93 ctx *RPCContext 94 } 95 96 func buildBatchCausetTasks(bo *Backoffer, cache *RegionCache, ranges *copRanges, req *ekv.Request) ([]*batchCopTask, error) { 97 start := time.Now() 98 const cmdType = einsteindbrpc.CmdBatchCop 99 rangesLen := ranges.len() 100 for { 101 var tasks []*copTask 102 appendTask := func(regionWithRangeInfo *KeyLocation, ranges *copRanges) { 103 tasks = append(tasks, &copTask{ 104 region: regionWithRangeInfo.Region, 105 ranges: ranges, 106 cmdType: cmdType, 107 storeType: req.StoreType, 108 }) 109 } 110 111 err := splitRanges(bo, cache, ranges, appendTask) 112 if err != nil { 113 return nil, errors.Trace(err) 114 } 115 116 var batchTasks []*batchCopTask 117 118 storeTaskMap := make(map[string]*batchCopTask) 119 needRetry := false 120 for _, task := range tasks { 121 rpcCtx, err := cache.GetTiFlashRPCContext(bo, task.region) 122 if err != nil { 123 return nil, errors.Trace(err) 124 } 125 // If the region is not found in cache, it must be out 126 // of date and already be cleaned up. We should retry and generate new tasks. 127 if rpcCtx == nil { 128 needRetry = true 129 err = bo.Backoff(BoRegionMiss, errors.New("Cannot find region or TiFlash peer")) 130 logutil.BgLogger().Info("retry for TiFlash peer or region missing", zap.Uint64("region id", task.region.GetID())) 131 if err != nil { 132 return nil, errors.Trace(err) 133 } 134 break 135 } 136 if batchCop, ok := storeTaskMap[rpcCtx.Addr]; ok { 137 batchCop.CausetTasks = append(batchCop.CausetTasks, copTaskAndRPCContext{task: task, ctx: rpcCtx}) 138 } else { 139 batchTask := &batchCopTask{ 140 storeAddr: rpcCtx.Addr, 141 cmdType: cmdType, 142 CausetTasks: []copTaskAndRPCContext{{task, rpcCtx}}, 143 } 144 storeTaskMap[rpcCtx.Addr] = batchTask 145 } 146 } 147 if needRetry { 148 continue 149 } 150 for _, task := range storeTaskMap { 151 batchTasks = append(batchTasks, task) 152 } 153 154 if elapsed := time.Since(start); elapsed > time.Millisecond*500 { 155 logutil.BgLogger().Warn("buildBatchCausetTasks takes too much time", 156 zap.Duration("elapsed", elapsed), 157 zap.Int("range len", rangesLen), 158 zap.Int("task len", len(batchTasks))) 159 } 160 einsteindbTxnRegionsNumHistogramWithBatchCoprocessor.Observe(float64(len(batchTasks))) 161 return batchTasks, nil 162 } 163 } 164 165 func (c *CopClient) sendBatch(ctx context.Context, req *ekv.Request, vars *ekv.Variables) ekv.Response { 166 if req.KeepOrder || req.Desc { 167 return copErrorResponse{errors.New("batch interlock cannot prove keep order or desc property")} 168 } 169 ctx = context.WithValue(ctx, txnStartKey, req.StartTs) 170 bo := NewBackofferWithVars(ctx, copBuildTaskMaxBackoff, vars) 171 tasks, err := buildBatchCausetTasks(bo, c.causetstore.regionCache, &copRanges{mid: req.KeyRanges}, req) 172 if err != nil { 173 return copErrorResponse{err} 174 } 175 it := &batchCopIterator{ 176 causetstore: c.causetstore, 177 req: req, 178 finishCh: make(chan struct{}), 179 vars: vars, 180 memTracker: req.MemTracker, 181 clientHelper: clientHelper{ 182 LockResolver: c.causetstore.lockResolver, 183 RegionCache: c.causetstore.regionCache, 184 Client: c.causetstore.client, 185 minCommitTSPushed: &minCommitTSPushed{data: make(map[uint64]struct{}, 5)}, 186 }, 187 rpcCancel: NewRPCanceller(), 188 } 189 ctx = context.WithValue(ctx, RPCCancellerCtxKey{}, it.rpcCancel) 190 it.tasks = tasks 191 it.respChan = make(chan *batchCopResponse, 2048) 192 go it.run(ctx) 193 return it 194 } 195 196 type batchCopIterator struct { 197 clientHelper 198 199 causetstore *einsteindbStore 200 req *ekv.Request 201 finishCh chan struct{} 202 203 tasks []*batchCopTask 204 205 // Batch results are stored in respChan. 206 respChan chan *batchCopResponse 207 208 vars *ekv.Variables 209 210 memTracker *memory.Tracker 211 212 replicaReadSeed uint32 213 214 rpcCancel *RPCCanceller 215 216 wg sync.WaitGroup 217 // closed represents when the Close is called. 218 // There are two cases we need to close the `finishCh` channel, one is when context is done, the other one is 219 // when the Close is called. we use atomic.CompareAndSwap `closed` to to make sure the channel is not closed twice. 220 closed uint32 221 } 222 223 func (b *batchCopIterator) run(ctx context.Context) { 224 // We run workers for every batch cop. 225 for _, task := range b.tasks { 226 b.wg.Add(1) 227 bo := NewBackofferWithVars(ctx, copNextMaxBackoff, b.vars) 228 go b.handleTask(ctx, bo, task) 229 } 230 b.wg.Wait() 231 close(b.respChan) 232 } 233 234 // Next returns next interlock result. 235 // NOTE: Use nil to indicate finish, so if the returned ResultSubset is not nil, reader should continue to call Next(). 236 func (b *batchCopIterator) Next(ctx context.Context) (ekv.ResultSubset, error) { 237 var ( 238 resp *batchCopResponse 239 ok bool 240 closed bool 241 ) 242 243 // Get next fetched resp from chan 244 resp, ok, closed = b.recvFromRespCh(ctx) 245 if !ok || closed { 246 return nil, nil 247 } 248 249 if resp.err != nil { 250 return nil, errors.Trace(resp.err) 251 } 252 253 err := b.causetstore.CheckVisibility(b.req.StartTs) 254 if err != nil { 255 return nil, errors.Trace(err) 256 } 257 return resp, nil 258 } 259 260 func (b *batchCopIterator) recvFromRespCh(ctx context.Context) (resp *batchCopResponse, ok bool, exit bool) { 261 ticker := time.NewTicker(3 * time.Second) 262 defer ticker.Stop() 263 for { 264 select { 265 case resp, ok = <-b.respChan: 266 return 267 case <-ticker.C: 268 if atomic.LoadUint32(b.vars.Killed) == 1 { 269 resp = &batchCopResponse{err: ErrQueryInterrupted} 270 ok = true 271 return 272 } 273 case <-b.finishCh: 274 exit = true 275 return 276 case <-ctx.Done(): 277 // We select the ctx.Done() in the thread of `Next` instead of in the worker to avoid the cost of `WithCancel`. 278 if atomic.CompareAndSwapUint32(&b.closed, 0, 1) { 279 close(b.finishCh) 280 } 281 exit = true 282 return 283 } 284 } 285 } 286 287 // Close releases the resource. 288 func (b *batchCopIterator) Close() error { 289 if atomic.CompareAndSwapUint32(&b.closed, 0, 1) { 290 close(b.finishCh) 291 } 292 b.rpcCancel.CancelAll() 293 b.wg.Wait() 294 return nil 295 } 296 297 func (b *batchCopIterator) handleTask(ctx context.Context, bo *Backoffer, task *batchCopTask) { 298 logutil.BgLogger().Debug("handle batch task") 299 tasks := []*batchCopTask{task} 300 for idx := 0; idx < len(tasks); idx++ { 301 ret, err := b.handleTaskOnce(ctx, bo, tasks[idx]) 302 if err != nil { 303 resp := &batchCopResponse{err: errors.Trace(err), detail: new(CopRuntimeStats)} 304 b.sendToRespCh(resp) 305 break 306 } 307 tasks = append(tasks, ret...) 308 } 309 b.wg.Done() 310 } 311 312 // Merge all ranges and request again. 313 func (b *batchCopIterator) retryBatchCopTask(ctx context.Context, bo *Backoffer, batchTask *batchCopTask) ([]*batchCopTask, error) { 314 ranges := &copRanges{} 315 for _, taskCtx := range batchTask.CausetTasks { 316 taskCtx.task.ranges.do(func(ran *ekv.KeyRange) { 317 ranges.mid = append(ranges.mid, *ran) 318 }) 319 } 320 return buildBatchCausetTasks(bo, b.RegionCache, ranges, b.req) 321 } 322 323 func (b *batchCopIterator) handleTaskOnce(ctx context.Context, bo *Backoffer, task *batchCopTask) ([]*batchCopTask, error) { 324 logutil.BgLogger().Debug("handle batch task once") 325 sender := NewRegionBatchRequestSender(b.causetstore.regionCache, b.causetstore.client) 326 var regionInfos []*interlock.RegionInfo 327 for _, task := range task.CausetTasks { 328 regionInfos = append(regionInfos, &interlock.RegionInfo{ 329 RegionId: task.task.region.id, 330 RegionEpoch: &spacetimepb.RegionEpoch{ 331 ConfVer: task.task.region.confVer, 332 Version: task.task.region.ver, 333 }, 334 Ranges: task.task.ranges.toPBRanges(), 335 }) 336 } 337 338 copReq := interlock.BatchRequest{ 339 Tp: b.req.Tp, 340 StartTs: b.req.StartTs, 341 Data: b.req.Data, 342 SchemaVer: b.req.SchemaVar, 343 Regions: regionInfos, 344 } 345 346 req := einsteindbrpc.NewRequest(task.cmdType, &copReq, ekvrpcpb.Context{ 347 IsolationLevel: pbIsolationLevel(b.req.IsolationLevel), 348 Priority: ekvPriorityToCommandPri(b.req.Priority), 349 NotFillCache: b.req.NotFillCache, 350 HandleTime: true, 351 ScanDetail: true, 352 TaskId: b.req.TaskID, 353 }) 354 req.StoreTp = ekv.TiFlash 355 356 logutil.BgLogger().Debug("send batch request to ", zap.String("req info", req.String()), zap.Int("cop task len", len(task.CausetTasks))) 357 resp, retry, cancel, err := sender.sendStreamReqToAddr(bo, task.CausetTasks, req, ReadTimeoutUltraLong) 358 // If there are causetstore errors, we should retry for all regions. 359 if retry { 360 return b.retryBatchCopTask(ctx, bo, task) 361 } 362 if err != nil { 363 return nil, errors.Trace(err) 364 } 365 defer cancel() 366 return nil, b.handleStreamedBatchCopResponse(ctx, bo, resp.Resp.(*einsteindbrpc.BatchCopStreamResponse), task) 367 } 368 369 func (b *batchCopIterator) handleStreamedBatchCopResponse(ctx context.Context, bo *Backoffer, response *einsteindbrpc.BatchCopStreamResponse, task *batchCopTask) (err error) { 370 defer response.Close() 371 resp := response.BatchResponse 372 if resp == nil { 373 // streaming request returns io.EOF, so the first Response is nil. 374 return 375 } 376 for { 377 err = b.handleBatchCopResponse(bo, resp, task) 378 if err != nil { 379 return errors.Trace(err) 380 } 381 resp, err = response.Recv() 382 if err != nil { 383 if errors.Cause(err) == io.EOF { 384 return nil 385 } 386 387 if err1 := bo.Backoff(boEinsteinDBRPC, errors.Errorf("recv stream response error: %v, task causetstore addr: %s", err, task.storeAddr)); err1 != nil { 388 return errors.Trace(err) 389 } 390 391 // No interlock.Response for network error, rebuild task based on the last success one. 392 if errors.Cause(err) == context.Canceled { 393 logutil.BgLogger().Info("stream recv timeout", zap.Error(err)) 394 } else { 395 logutil.BgLogger().Info("stream unknown error", zap.Error(err)) 396 } 397 return errors.Trace(err) 398 } 399 } 400 } 401 402 func (b *batchCopIterator) handleBatchCopResponse(bo *Backoffer, response *interlock.BatchResponse, task *batchCopTask) (err error) { 403 if otherErr := response.GetOtherError(); otherErr != "" { 404 err = errors.Errorf("other error: %s", otherErr) 405 logutil.BgLogger().Warn("other error", 406 zap.Uint64("txnStartTS", b.req.StartTs), 407 zap.String("storeAddr", task.storeAddr), 408 zap.Error(err)) 409 return errors.Trace(err) 410 } 411 412 resp := batchCopResponse{ 413 pbResp: response, 414 detail: new(CopRuntimeStats), 415 } 416 417 resp.detail.BackoffTime = time.Duration(bo.totalSleep) * time.Millisecond 418 resp.detail.BackoffSleep = make(map[string]time.Duration, len(bo.backoffTimes)) 419 resp.detail.BackoffTimes = make(map[string]int, len(bo.backoffTimes)) 420 for backoff := range bo.backoffTimes { 421 backoffName := backoff.String() 422 resp.detail.BackoffTimes[backoffName] = bo.backoffTimes[backoff] 423 resp.detail.BackoffSleep[backoffName] = time.Duration(bo.backoffSleepMS[backoff]) * time.Millisecond 424 } 425 resp.detail.CalleeAddress = task.storeAddr 426 427 b.sendToRespCh(&resp) 428 429 return 430 } 431 432 func (b *batchCopIterator) sendToRespCh(resp *batchCopResponse) (exit bool) { 433 select { 434 case b.respChan <- resp: 435 case <-b.finishCh: 436 exit = true 437 } 438 return 439 }