github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/batch_coprocessor.go

github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/batch_coprocessor.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package einsteindb
    15  
    16  import (
    17  	"context"
    18  	"io"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/whtcorpsinc/ekvproto/pkg/ekvrpcpb"
    24  	"github.com/whtcorpsinc/ekvproto/pkg/interlock"
    25  	"github.com/whtcorpsinc/ekvproto/pkg/spacetimepb"
    26  	"github.com/whtcorpsinc/errors"
    27  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc"
    28  	"github.com/whtcorpsinc/milevadb/ekv"
    29  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    30  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    31  	"go.uber.org/zap"
    32  )
    33  
    34  // batchCopTask comprises of multiple copTask that will send to same causetstore.
    35  type batchCopTask struct {
    36  	storeAddr string
    37  	cmdType   einsteindbrpc.CmdType
    38  
    39  	CausetTasks []copTaskAndRPCContext
    40  }
    41  
    42  type batchCopResponse struct {
    43  	pbResp *interlock.BatchResponse
    44  	detail *CopRuntimeStats
    45  
    46  	// batch Causet Response is yet to return startKey. So batchCop cannot retry partially.
    47  	startKey ekv.Key
    48  	err      error
    49  	respSize int64
    50  	respTime time.Duration
    51  }
    52  
    53  // GetData implements the ekv.ResultSubset GetData interface.
    54  func (rs *batchCopResponse) GetData() []byte {
    55  	return rs.pbResp.Data
    56  }
    57  
    58  // GetStartKey implements the ekv.ResultSubset GetStartKey interface.
    59  func (rs *batchCopResponse) GetStartKey() ekv.Key {
    60  	return rs.startKey
    61  }
    62  
    63  // GetInterDircDetails is unavailable currently, because TiFlash has not collected exec details for batch cop.
    64  // TODO: Will fix in near future.
    65  func (rs *batchCopResponse) GetCopRuntimeStats() *CopRuntimeStats {
    66  	return rs.detail
    67  }
    68  
    69  // MemSize returns how many bytes of memory this response use
    70  func (rs *batchCopResponse) MemSize() int64 {
    71  	if rs.respSize != 0 {
    72  		return rs.respSize
    73  	}
    74  
    75  	// ignore rs.err
    76  	rs.respSize += int64(cap(rs.startKey))
    77  	if rs.detail != nil {
    78  		rs.respSize += int64(sizeofInterDircDetails)
    79  	}
    80  	if rs.pbResp != nil {
    81  		// Using a approximate size since it's hard to get a accurate value.
    82  		rs.respSize += int64(rs.pbResp.Size())
    83  	}
    84  	return rs.respSize
    85  }
    86  
    87  func (rs *batchCopResponse) RespTime() time.Duration {
    88  	return rs.respTime
    89  }
    90  
    91  type copTaskAndRPCContext struct {
    92  	task *copTask
    93  	ctx  *RPCContext
    94  }
    95  
    96  func buildBatchCausetTasks(bo *Backoffer, cache *RegionCache, ranges *copRanges, req *ekv.Request) ([]*batchCopTask, error) {
    97  	start := time.Now()
    98  	const cmdType = einsteindbrpc.CmdBatchCop
    99  	rangesLen := ranges.len()
   100  	for {
   101  		var tasks []*copTask
   102  		appendTask := func(regionWithRangeInfo *KeyLocation, ranges *copRanges) {
   103  			tasks = append(tasks, &copTask{
   104  				region:    regionWithRangeInfo.Region,
   105  				ranges:    ranges,
   106  				cmdType:   cmdType,
   107  				storeType: req.StoreType,
   108  			})
   109  		}
   110  
   111  		err := splitRanges(bo, cache, ranges, appendTask)
   112  		if err != nil {
   113  			return nil, errors.Trace(err)
   114  		}
   115  
   116  		var batchTasks []*batchCopTask
   117  
   118  		storeTaskMap := make(map[string]*batchCopTask)
   119  		needRetry := false
   120  		for _, task := range tasks {
   121  			rpcCtx, err := cache.GetTiFlashRPCContext(bo, task.region)
   122  			if err != nil {
   123  				return nil, errors.Trace(err)
   124  			}
   125  			// If the region is not found in cache, it must be out
   126  			// of date and already be cleaned up. We should retry and generate new tasks.
   127  			if rpcCtx == nil {
   128  				needRetry = true
   129  				err = bo.Backoff(BoRegionMiss, errors.New("Cannot find region or TiFlash peer"))
   130  				logutil.BgLogger().Info("retry for TiFlash peer or region missing", zap.Uint64("region id", task.region.GetID()))
   131  				if err != nil {
   132  					return nil, errors.Trace(err)
   133  				}
   134  				break
   135  			}
   136  			if batchCop, ok := storeTaskMap[rpcCtx.Addr]; ok {
   137  				batchCop.CausetTasks = append(batchCop.CausetTasks, copTaskAndRPCContext{task: task, ctx: rpcCtx})
   138  			} else {
   139  				batchTask := &batchCopTask{
   140  					storeAddr:   rpcCtx.Addr,
   141  					cmdType:     cmdType,
   142  					CausetTasks: []copTaskAndRPCContext{{task, rpcCtx}},
   143  				}
   144  				storeTaskMap[rpcCtx.Addr] = batchTask
   145  			}
   146  		}
   147  		if needRetry {
   148  			continue
   149  		}
   150  		for _, task := range storeTaskMap {
   151  			batchTasks = append(batchTasks, task)
   152  		}
   153  
   154  		if elapsed := time.Since(start); elapsed > time.Millisecond*500 {
   155  			logutil.BgLogger().Warn("buildBatchCausetTasks takes too much time",
   156  				zap.Duration("elapsed", elapsed),
   157  				zap.Int("range len", rangesLen),
   158  				zap.Int("task len", len(batchTasks)))
   159  		}
   160  		einsteindbTxnRegionsNumHistogramWithBatchCoprocessor.Observe(float64(len(batchTasks)))
   161  		return batchTasks, nil
   162  	}
   163  }
   164  
   165  func (c *CopClient) sendBatch(ctx context.Context, req *ekv.Request, vars *ekv.Variables) ekv.Response {
   166  	if req.KeepOrder || req.Desc {
   167  		return copErrorResponse{errors.New("batch interlock cannot prove keep order or desc property")}
   168  	}
   169  	ctx = context.WithValue(ctx, txnStartKey, req.StartTs)
   170  	bo := NewBackofferWithVars(ctx, copBuildTaskMaxBackoff, vars)
   171  	tasks, err := buildBatchCausetTasks(bo, c.causetstore.regionCache, &copRanges{mid: req.KeyRanges}, req)
   172  	if err != nil {
   173  		return copErrorResponse{err}
   174  	}
   175  	it := &batchCopIterator{
   176  		causetstore: c.causetstore,
   177  		req:         req,
   178  		finishCh:    make(chan struct{}),
   179  		vars:        vars,
   180  		memTracker:  req.MemTracker,
   181  		clientHelper: clientHelper{
   182  			LockResolver:      c.causetstore.lockResolver,
   183  			RegionCache:       c.causetstore.regionCache,
   184  			Client:            c.causetstore.client,
   185  			minCommitTSPushed: &minCommitTSPushed{data: make(map[uint64]struct{}, 5)},
   186  		},
   187  		rpcCancel: NewRPCanceller(),
   188  	}
   189  	ctx = context.WithValue(ctx, RPCCancellerCtxKey{}, it.rpcCancel)
   190  	it.tasks = tasks
   191  	it.respChan = make(chan *batchCopResponse, 2048)
   192  	go it.run(ctx)
   193  	return it
   194  }
   195  
   196  type batchCopIterator struct {
   197  	clientHelper
   198  
   199  	causetstore *einsteindbStore
   200  	req         *ekv.Request
   201  	finishCh    chan struct{}
   202  
   203  	tasks []*batchCopTask
   204  
   205  	// Batch results are stored in respChan.
   206  	respChan chan *batchCopResponse
   207  
   208  	vars *ekv.Variables
   209  
   210  	memTracker *memory.Tracker
   211  
   212  	replicaReadSeed uint32
   213  
   214  	rpcCancel *RPCCanceller
   215  
   216  	wg sync.WaitGroup
   217  	// closed represents when the Close is called.
   218  	// There are two cases we need to close the `finishCh` channel, one is when context is done, the other one is
   219  	// when the Close is called. we use atomic.CompareAndSwap `closed` to to make sure the channel is not closed twice.
   220  	closed uint32
   221  }
   222  
   223  func (b *batchCopIterator) run(ctx context.Context) {
   224  	// We run workers for every batch cop.
   225  	for _, task := range b.tasks {
   226  		b.wg.Add(1)
   227  		bo := NewBackofferWithVars(ctx, copNextMaxBackoff, b.vars)
   228  		go b.handleTask(ctx, bo, task)
   229  	}
   230  	b.wg.Wait()
   231  	close(b.respChan)
   232  }
   233  
   234  // Next returns next interlock result.
   235  // NOTE: Use nil to indicate finish, so if the returned ResultSubset is not nil, reader should continue to call Next().
   236  func (b *batchCopIterator) Next(ctx context.Context) (ekv.ResultSubset, error) {
   237  	var (
   238  		resp   *batchCopResponse
   239  		ok     bool
   240  		closed bool
   241  	)
   242  
   243  	// Get next fetched resp from chan
   244  	resp, ok, closed = b.recvFromRespCh(ctx)
   245  	if !ok || closed {
   246  		return nil, nil
   247  	}
   248  
   249  	if resp.err != nil {
   250  		return nil, errors.Trace(resp.err)
   251  	}
   252  
   253  	err := b.causetstore.CheckVisibility(b.req.StartTs)
   254  	if err != nil {
   255  		return nil, errors.Trace(err)
   256  	}
   257  	return resp, nil
   258  }
   259  
   260  func (b *batchCopIterator) recvFromRespCh(ctx context.Context) (resp *batchCopResponse, ok bool, exit bool) {
   261  	ticker := time.NewTicker(3 * time.Second)
   262  	defer ticker.Stop()
   263  	for {
   264  		select {
   265  		case resp, ok = <-b.respChan:
   266  			return
   267  		case <-ticker.C:
   268  			if atomic.LoadUint32(b.vars.Killed) == 1 {
   269  				resp = &batchCopResponse{err: ErrQueryInterrupted}
   270  				ok = true
   271  				return
   272  			}
   273  		case <-b.finishCh:
   274  			exit = true
   275  			return
   276  		case <-ctx.Done():
   277  			// We select the ctx.Done() in the thread of `Next` instead of in the worker to avoid the cost of `WithCancel`.
   278  			if atomic.CompareAndSwapUint32(&b.closed, 0, 1) {
   279  				close(b.finishCh)
   280  			}
   281  			exit = true
   282  			return
   283  		}
   284  	}
   285  }
   286  
   287  // Close releases the resource.
   288  func (b *batchCopIterator) Close() error {
   289  	if atomic.CompareAndSwapUint32(&b.closed, 0, 1) {
   290  		close(b.finishCh)
   291  	}
   292  	b.rpcCancel.CancelAll()
   293  	b.wg.Wait()
   294  	return nil
   295  }
   296  
   297  func (b *batchCopIterator) handleTask(ctx context.Context, bo *Backoffer, task *batchCopTask) {
   298  	logutil.BgLogger().Debug("handle batch task")
   299  	tasks := []*batchCopTask{task}
   300  	for idx := 0; idx < len(tasks); idx++ {
   301  		ret, err := b.handleTaskOnce(ctx, bo, tasks[idx])
   302  		if err != nil {
   303  			resp := &batchCopResponse{err: errors.Trace(err), detail: new(CopRuntimeStats)}
   304  			b.sendToRespCh(resp)
   305  			break
   306  		}
   307  		tasks = append(tasks, ret...)
   308  	}
   309  	b.wg.Done()
   310  }
   311  
   312  // Merge all ranges and request again.
   313  func (b *batchCopIterator) retryBatchCopTask(ctx context.Context, bo *Backoffer, batchTask *batchCopTask) ([]*batchCopTask, error) {
   314  	ranges := &copRanges{}
   315  	for _, taskCtx := range batchTask.CausetTasks {
   316  		taskCtx.task.ranges.do(func(ran *ekv.KeyRange) {
   317  			ranges.mid = append(ranges.mid, *ran)
   318  		})
   319  	}
   320  	return buildBatchCausetTasks(bo, b.RegionCache, ranges, b.req)
   321  }
   322  
   323  func (b *batchCopIterator) handleTaskOnce(ctx context.Context, bo *Backoffer, task *batchCopTask) ([]*batchCopTask, error) {
   324  	logutil.BgLogger().Debug("handle batch task once")
   325  	sender := NewRegionBatchRequestSender(b.causetstore.regionCache, b.causetstore.client)
   326  	var regionInfos []*interlock.RegionInfo
   327  	for _, task := range task.CausetTasks {
   328  		regionInfos = append(regionInfos, &interlock.RegionInfo{
   329  			RegionId: task.task.region.id,
   330  			RegionEpoch: &spacetimepb.RegionEpoch{
   331  				ConfVer: task.task.region.confVer,
   332  				Version: task.task.region.ver,
   333  			},
   334  			Ranges: task.task.ranges.toPBRanges(),
   335  		})
   336  	}
   337  
   338  	copReq := interlock.BatchRequest{
   339  		Tp:        b.req.Tp,
   340  		StartTs:   b.req.StartTs,
   341  		Data:      b.req.Data,
   342  		SchemaVer: b.req.SchemaVar,
   343  		Regions:   regionInfos,
   344  	}
   345  
   346  	req := einsteindbrpc.NewRequest(task.cmdType, &copReq, ekvrpcpb.Context{
   347  		IsolationLevel: pbIsolationLevel(b.req.IsolationLevel),
   348  		Priority:       ekvPriorityToCommandPri(b.req.Priority),
   349  		NotFillCache:   b.req.NotFillCache,
   350  		HandleTime:     true,
   351  		ScanDetail:     true,
   352  		TaskId:         b.req.TaskID,
   353  	})
   354  	req.StoreTp = ekv.TiFlash
   355  
   356  	logutil.BgLogger().Debug("send batch request to ", zap.String("req info", req.String()), zap.Int("cop task len", len(task.CausetTasks)))
   357  	resp, retry, cancel, err := sender.sendStreamReqToAddr(bo, task.CausetTasks, req, ReadTimeoutUltraLong)
   358  	// If there are causetstore errors, we should retry for all regions.
   359  	if retry {
   360  		return b.retryBatchCopTask(ctx, bo, task)
   361  	}
   362  	if err != nil {
   363  		return nil, errors.Trace(err)
   364  	}
   365  	defer cancel()
   366  	return nil, b.handleStreamedBatchCopResponse(ctx, bo, resp.Resp.(*einsteindbrpc.BatchCopStreamResponse), task)
   367  }
   368  
   369  func (b *batchCopIterator) handleStreamedBatchCopResponse(ctx context.Context, bo *Backoffer, response *einsteindbrpc.BatchCopStreamResponse, task *batchCopTask) (err error) {
   370  	defer response.Close()
   371  	resp := response.BatchResponse
   372  	if resp == nil {
   373  		// streaming request returns io.EOF, so the first Response is nil.
   374  		return
   375  	}
   376  	for {
   377  		err = b.handleBatchCopResponse(bo, resp, task)
   378  		if err != nil {
   379  			return errors.Trace(err)
   380  		}
   381  		resp, err = response.Recv()
   382  		if err != nil {
   383  			if errors.Cause(err) == io.EOF {
   384  				return nil
   385  			}
   386  
   387  			if err1 := bo.Backoff(boEinsteinDBRPC, errors.Errorf("recv stream response error: %v, task causetstore addr: %s", err, task.storeAddr)); err1 != nil {
   388  				return errors.Trace(err)
   389  			}
   390  
   391  			// No interlock.Response for network error, rebuild task based on the last success one.
   392  			if errors.Cause(err) == context.Canceled {
   393  				logutil.BgLogger().Info("stream recv timeout", zap.Error(err))
   394  			} else {
   395  				logutil.BgLogger().Info("stream unknown error", zap.Error(err))
   396  			}
   397  			return errors.Trace(err)
   398  		}
   399  	}
   400  }
   401  
   402  func (b *batchCopIterator) handleBatchCopResponse(bo *Backoffer, response *interlock.BatchResponse, task *batchCopTask) (err error) {
   403  	if otherErr := response.GetOtherError(); otherErr != "" {
   404  		err = errors.Errorf("other error: %s", otherErr)
   405  		logutil.BgLogger().Warn("other error",
   406  			zap.Uint64("txnStartTS", b.req.StartTs),
   407  			zap.String("storeAddr", task.storeAddr),
   408  			zap.Error(err))
   409  		return errors.Trace(err)
   410  	}
   411  
   412  	resp := batchCopResponse{
   413  		pbResp: response,
   414  		detail: new(CopRuntimeStats),
   415  	}
   416  
   417  	resp.detail.BackoffTime = time.Duration(bo.totalSleep) * time.Millisecond
   418  	resp.detail.BackoffSleep = make(map[string]time.Duration, len(bo.backoffTimes))
   419  	resp.detail.BackoffTimes = make(map[string]int, len(bo.backoffTimes))
   420  	for backoff := range bo.backoffTimes {
   421  		backoffName := backoff.String()
   422  		resp.detail.BackoffTimes[backoffName] = bo.backoffTimes[backoff]
   423  		resp.detail.BackoffSleep[backoffName] = time.Duration(bo.backoffSleepMS[backoff]) * time.Millisecond
   424  	}
   425  	resp.detail.CalleeAddress = task.storeAddr
   426  
   427  	b.sendToRespCh(&resp)
   428  
   429  	return
   430  }
   431  
   432  func (b *batchCopIterator) sendToRespCh(resp *batchCopResponse) (exit bool) {
   433  	select {
   434  	case b.respChan <- resp:
   435  	case <-b.finishCh:
   436  		exit = true
   437  	}
   438  	return
   439  }