github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/client.go

github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/client.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  // Package einsteindb provides tcp connection to ekvserver.
    15  package einsteindb
    16  
    17  import (
    18  	"context"
    19  	"io"
    20  	"math"
    21  	"runtime/trace"
    22  	"strconv"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	grpc_opentracing "github.com/grpc-ecosystem/go-grpc-midbseware/tracing/opentracing"
    28  	"github.com/opentracing/opentracing-go"
    29  	"github.com/prometheus/client_golang/prometheus"
    30  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    31  	"github.com/whtcorpsinc/ekvproto/pkg/debugpb"
    32  	"github.com/whtcorpsinc/ekvproto/pkg/einsteindbpb"
    33  	"github.com/whtcorpsinc/ekvproto/pkg/interlock"
    34  	"github.com/whtcorpsinc/errors"
    35  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc"
    36  	"github.com/whtcorpsinc/milevadb/config"
    37  	"github.com/whtcorpsinc/milevadb/ekv"
    38  	"github.com/whtcorpsinc/milevadb/metrics"
    39  	"github.com/whtcorpsinc/milevadb/soliton/execdetails"
    40  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    41  	"google.golang.org/grpc"
    42  	"google.golang.org/grpc/backoff"
    43  	"google.golang.org/grpc/connectivity"
    44  	"google.golang.org/grpc/credentials"
    45  	"google.golang.org/grpc/keepalive"
    46  )
    47  
    48  // MaxRecvMsgSize set max gRPC receive message size received from server. If any message size is larger than
    49  // current value, an error will be reported from gRPC.
    50  var MaxRecvMsgSize = math.MaxInt64
    51  
    52  // Timeout durations.
    53  var (
    54  	dialTimeout               = 5 * time.Second
    55  	readTimeoutShort          = 20 * time.Second   // For requests that read/write several key-values.
    56  	ReadTimeoutMedium         = 60 * time.Second   // For requests that may need scan region.
    57  	ReadTimeoutLong           = 150 * time.Second  // For requests that may need scan region multiple times.
    58  	ReadTimeoutUltraLong      = 3600 * time.Second // For requests that may scan many regions for tiflash.
    59  	GCTimeout                 = 5 * time.Minute
    60  	UnsafeDestroyRangeTimeout = 5 * time.Minute
    61  	AccessLockObserverTimeout = 10 * time.Second
    62  )
    63  
    64  const (
    65  	grpcInitialWindowSize     = 1 << 30
    66  	grpcInitialConnWindowSize = 1 << 30
    67  )
    68  
    69  // Client is a client that sends RPC.
    70  // It should not be used after calling Close().
    71  type Client interface {
    72  	// Close should release all data.
    73  	Close() error
    74  	// SendRequest sends Request.
    75  	SendRequest(ctx context.Context, addr string, req *einsteindbrpc.Request, timeout time.Duration) (*einsteindbrpc.Response, error)
    76  }
    77  
    78  type connArray struct {
    79  	// The target host.
    80  	target string
    81  
    82  	index uint32
    83  	v     []*grpc.ClientConn
    84  	// streamTimeout binds with a background goroutine to process interlock streaming timeout.
    85  	streamTimeout chan *einsteindbrpc.Lease
    86  	dialTimeout   time.Duration
    87  	// batchConn is not null when batch is enabled.
    88  	*batchConn
    89  	done chan struct{}
    90  }
    91  
    92  func newConnArray(maxSize uint, addr string, security config.Security, idleNotify *uint32, enableBatch bool, dialTimeout time.Duration) (*connArray, error) {
    93  	a := &connArray{
    94  		index:         0,
    95  		v:             make([]*grpc.ClientConn, maxSize),
    96  		streamTimeout: make(chan *einsteindbrpc.Lease, 1024),
    97  		done:          make(chan struct{}),
    98  		dialTimeout:   dialTimeout,
    99  	}
   100  	if err := a.Init(addr, security, idleNotify, enableBatch); err != nil {
   101  		return nil, err
   102  	}
   103  	return a, nil
   104  }
   105  
   106  func (a *connArray) Init(addr string, security config.Security, idleNotify *uint32, enableBatch bool) error {
   107  	a.target = addr
   108  
   109  	opt := grpc.WithInsecure()
   110  	if len(security.ClusterSSLCA) != 0 {
   111  		tlsConfig, err := security.ToTLSConfig()
   112  		if err != nil {
   113  			return errors.Trace(err)
   114  		}
   115  		opt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
   116  	}
   117  
   118  	cfg := config.GetGlobalConfig()
   119  	var (
   120  		unaryInterceptor  grpc.UnaryClientInterceptor
   121  		streamInterceptor grpc.StreamClientInterceptor
   122  	)
   123  	if cfg.OpenTracing.Enable {
   124  		unaryInterceptor = grpc_opentracing.UnaryClientInterceptor()
   125  		streamInterceptor = grpc_opentracing.StreamClientInterceptor()
   126  	}
   127  
   128  	allowBatch := (cfg.EinsteinDBClient.MaxBatchSize > 0) && enableBatch
   129  	if allowBatch {
   130  		a.batchConn = newBatchConn(uint(len(a.v)), cfg.EinsteinDBClient.MaxBatchSize, idleNotify)
   131  		a.pendingRequests = metrics.EinsteinDBPendingBatchRequests.WithLabelValues(a.target)
   132  	}
   133  	keepAlive := cfg.EinsteinDBClient.GrpcKeepAliveTime
   134  	keepAliveTimeout := cfg.EinsteinDBClient.GrpcKeepAliveTimeout
   135  	for i := range a.v {
   136  		ctx, cancel := context.WithTimeout(context.Background(), a.dialTimeout)
   137  		conn, err := grpc.DialContext(
   138  			ctx,
   139  			addr,
   140  			opt,
   141  			grpc.WithInitialWindowSize(grpcInitialWindowSize),
   142  			grpc.WithInitialConnWindowSize(grpcInitialConnWindowSize),
   143  			grpc.WithUnaryInterceptor(unaryInterceptor),
   144  			grpc.WithStreamInterceptor(streamInterceptor),
   145  			grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(MaxRecvMsgSize)),
   146  			grpc.WithConnectParams(grpc.ConnectParams{
   147  				Backoff: backoff.Config{
   148  					BaseDelay:  100 * time.Millisecond, // Default was 1s.
   149  					Multiplier: 1.6,                    // Default
   150  					Jitter:     0.2,                    // Default
   151  					MaxDelay:   3 * time.Second,        // Default was 120s.
   152  				},
   153  				MinConnectTimeout: a.dialTimeout,
   154  			}),
   155  			grpc.WithKeepaliveParams(keepalive.ClientParameters{
   156  				Time:                time.Duration(keepAlive) * time.Second,
   157  				Timeout:             time.Duration(keepAliveTimeout) * time.Second,
   158  				PermitWithoutStream: true,
   159  			}),
   160  		)
   161  		cancel()
   162  		if err != nil {
   163  			// Cleanup if the initialization fails.
   164  			a.Close()
   165  			return errors.Trace(err)
   166  		}
   167  		a.v[i] = conn
   168  
   169  		if allowBatch {
   170  			batchClient := &batchCommandsClient{
   171  				target:              a.target,
   172  				conn:                conn,
   173  				batched:             sync.Map{},
   174  				idAlloc:             0,
   175  				closed:              0,
   176  				einsteindbClientCfg: cfg.EinsteinDBClient,
   177  				einsteindbLoad:      &a.einsteindbTransportLayerLoad,
   178  				dialTimeout:         a.dialTimeout,
   179  			}
   180  			a.batchCommandsClients = append(a.batchCommandsClients, batchClient)
   181  		}
   182  	}
   183  	go einsteindbrpc.CheckStreamTimeoutLoop(a.streamTimeout, a.done)
   184  	if allowBatch {
   185  		go a.batchSendLoop(cfg.EinsteinDBClient)
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  func (a *connArray) Get() *grpc.ClientConn {
   192  	next := atomic.AddUint32(&a.index, 1) % uint32(len(a.v))
   193  	return a.v[next]
   194  }
   195  
   196  func (a *connArray) Close() {
   197  	if a.batchConn != nil {
   198  		a.batchConn.Close()
   199  	}
   200  
   201  	for i, c := range a.v {
   202  		if c != nil {
   203  			err := c.Close()
   204  			terror.Log(errors.Trace(err))
   205  			a.v[i] = nil
   206  		}
   207  	}
   208  
   209  	close(a.done)
   210  }
   211  
   212  // rpcClient is RPC client struct.
   213  // TODO: Add flow control between RPC clients in MilevaDB ond RPC servers in EinsteinDB.
   214  // Since we use shared client connection to communicate to the same EinsteinDB, it's possible
   215  // that there are too many concurrent requests which overload the service of EinsteinDB.
   216  type rpcClient struct {
   217  	sync.RWMutex
   218  
   219  	conns    map[string]*connArray
   220  	security config.Security
   221  
   222  	idleNotify uint32
   223  	// Periodically check whether there is any connection that is idle and then close and remove these connections.
   224  	// Implement background cleanup.
   225  	isClosed    bool
   226  	dialTimeout time.Duration
   227  }
   228  
   229  func newRPCClient(security config.Security, opts ...func(c *rpcClient)) *rpcClient {
   230  	cli := &rpcClient{
   231  		conns:       make(map[string]*connArray),
   232  		security:    security,
   233  		dialTimeout: dialTimeout,
   234  	}
   235  	for _, opt := range opts {
   236  		opt(cli)
   237  	}
   238  	return cli
   239  }
   240  
   241  // NewTestRPCClient is for some external tests.
   242  func NewTestRPCClient(security config.Security) Client {
   243  	return newRPCClient(security)
   244  }
   245  
   246  func (c *rpcClient) getConnArray(addr string, enableBatch bool, opt ...func(cfg *config.EinsteinDBClient)) (*connArray, error) {
   247  	c.RLock()
   248  	if c.isClosed {
   249  		c.RUnlock()
   250  		return nil, errors.Errorf("rpcClient is closed")
   251  	}
   252  	array, ok := c.conns[addr]
   253  	c.RUnlock()
   254  	if !ok {
   255  		var err error
   256  		array, err = c.createConnArray(addr, enableBatch, opt...)
   257  		if err != nil {
   258  			return nil, err
   259  		}
   260  	}
   261  	return array, nil
   262  }
   263  
   264  func (c *rpcClient) createConnArray(addr string, enableBatch bool, opts ...func(cfg *config.EinsteinDBClient)) (*connArray, error) {
   265  	c.Lock()
   266  	defer c.Unlock()
   267  	array, ok := c.conns[addr]
   268  	if !ok {
   269  		var err error
   270  		client := config.GetGlobalConfig().EinsteinDBClient
   271  		for _, opt := range opts {
   272  			opt(&client)
   273  		}
   274  		array, err = newConnArray(client.GrpcConnectionCount, addr, c.security, &c.idleNotify, enableBatch, c.dialTimeout)
   275  		if err != nil {
   276  			return nil, err
   277  		}
   278  		c.conns[addr] = array
   279  	}
   280  	return array, nil
   281  }
   282  
   283  func (c *rpcClient) closeConns() {
   284  	c.Lock()
   285  	if !c.isClosed {
   286  		c.isClosed = true
   287  		// close all connections
   288  		for _, array := range c.conns {
   289  			array.Close()
   290  		}
   291  	}
   292  	c.Unlock()
   293  }
   294  
   295  var sendReqHistCache sync.Map
   296  
   297  type sendReqHistCacheKey struct {
   298  	tp einsteindbrpc.CmdType
   299  	id uint64
   300  }
   301  
   302  func (c *rpcClient) uFIDelateEinsteinDBSendReqHistogram(req *einsteindbrpc.Request, start time.Time) {
   303  	key := sendReqHistCacheKey{
   304  		req.Type,
   305  		req.Context.GetPeer().GetStoreId(),
   306  	}
   307  
   308  	v, ok := sendReqHistCache.Load(key)
   309  	if !ok {
   310  		reqType := req.Type.String()
   311  		storeID := strconv.FormatUint(req.Context.GetPeer().GetStoreId(), 10)
   312  		v = metrics.EinsteinDBSendReqHistogram.WithLabelValues(reqType, storeID)
   313  		sendReqHistCache.CausetStore(key, v)
   314  	}
   315  
   316  	v.(prometheus.Observer).Observe(time.Since(start).Seconds())
   317  }
   318  
   319  // SendRequest sends a Request to server and receives Response.
   320  func (c *rpcClient) SendRequest(ctx context.Context, addr string, req *einsteindbrpc.Request, timeout time.Duration) (*einsteindbrpc.Response, error) {
   321  	if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
   322  		span1 := span.Tracer().StartSpan("rpcClient.SendRequest", opentracing.ChildOf(span.Context()))
   323  		defer span1.Finish()
   324  		ctx = opentracing.ContextWithSpan(ctx, span1)
   325  	}
   326  
   327  	start := time.Now()
   328  	defer func() {
   329  		stmtInterDirc := ctx.Value(execdetails.StmtInterDircDetailKey)
   330  		if stmtInterDirc != nil {
   331  			detail := stmtInterDirc.(*execdetails.StmtInterDircDetails)
   332  			atomic.AddInt64(&detail.WaitKVResFIDeluration, int64(time.Since(start)))
   333  		}
   334  		c.uFIDelateEinsteinDBSendReqHistogram(req, start)
   335  	}()
   336  
   337  	if atomic.CompareAndSwapUint32(&c.idleNotify, 1, 0) {
   338  		c.recycleIdleConnArray()
   339  	}
   340  
   341  	// MilevaDB will not send batch commands to TiFlash, to resolve the conflict with Batch Causet Request.
   342  	enableBatch := req.StoreTp != ekv.MilevaDB && req.StoreTp != ekv.TiFlash
   343  	connArray, err := c.getConnArray(addr, enableBatch)
   344  	if err != nil {
   345  		return nil, errors.Trace(err)
   346  	}
   347  
   348  	// MilevaDB RPC server supports batch RPC, but batch connection will send heart beat, It's not necessary since
   349  	// request to MilevaDB is not high frequency.
   350  	if config.GetGlobalConfig().EinsteinDBClient.MaxBatchSize > 0 && enableBatch {
   351  		if batchReq := req.ToBatchCommandsRequest(); batchReq != nil {
   352  			defer trace.StartRegion(ctx, req.Type.String()).End()
   353  			return sendBatchRequest(ctx, addr, connArray.batchConn, batchReq, timeout)
   354  		}
   355  	}
   356  
   357  	clientConn := connArray.Get()
   358  	if state := clientConn.GetState(); state == connectivity.TransientFailure {
   359  		storeID := strconv.FormatUint(req.Context.GetPeer().GetStoreId(), 10)
   360  		metrics.GRPCConnTransientFailureCounter.WithLabelValues(addr, storeID).Inc()
   361  	}
   362  
   363  	if req.IsDebugReq() {
   364  		client := debugpb.NewDebugClient(clientConn)
   365  		ctx1, cancel := context.WithTimeout(ctx, timeout)
   366  		defer cancel()
   367  		return einsteindbrpc.CallDebugRPC(ctx1, client, req)
   368  	}
   369  
   370  	client := einsteindbpb.NewEinsteinDBClient(clientConn)
   371  
   372  	if req.Type == einsteindbrpc.CmdBatchCop {
   373  		return c.getBatchCopStreamResponse(ctx, client, req, timeout, connArray)
   374  	}
   375  
   376  	if req.Type == einsteindbrpc.CmdCopStream {
   377  		return c.getCopStreamResponse(ctx, client, req, timeout, connArray)
   378  	}
   379  	ctx1, cancel := context.WithTimeout(ctx, timeout)
   380  	defer cancel()
   381  	return einsteindbrpc.CallRPC(ctx1, client, req)
   382  }
   383  
   384  func (c *rpcClient) getCopStreamResponse(ctx context.Context, client einsteindbpb.EinsteinDBClient, req *einsteindbrpc.Request, timeout time.Duration, connArray *connArray) (*einsteindbrpc.Response, error) {
   385  	// Coprocessor streaming request.
   386  	// Use context to support timeout for grpc streaming client.
   387  	ctx1, cancel := context.WithCancel(ctx)
   388  	// Should NOT call defer cancel() here because it will cancel further stream.Recv()
   389  	// We put it in copStream.Lease.Cancel call this cancel at copStream.Close
   390  	// TODO: add unit test for SendRequest.
   391  	resp, err := einsteindbrpc.CallRPC(ctx1, client, req)
   392  	if err != nil {
   393  		cancel()
   394  		return nil, errors.Trace(err)
   395  	}
   396  
   397  	// Put the lease object to the timeout channel, so it would be checked periodically.
   398  	copStream := resp.Resp.(*einsteindbrpc.CopStreamResponse)
   399  	copStream.Timeout = timeout
   400  	copStream.Lease.Cancel = cancel
   401  	connArray.streamTimeout <- &copStream.Lease
   402  
   403  	// Read the first streaming response to get CopStreamResponse.
   404  	// This can make error handling much easier, because SendReq() retry on
   405  	// region error automatically.
   406  	var first *interlock.Response
   407  	first, err = copStream.Recv()
   408  	if err != nil {
   409  		if errors.Cause(err) != io.EOF {
   410  			return nil, errors.Trace(err)
   411  		}
   412  		logutil.BgLogger().Debug("copstream returns nothing for the request.")
   413  	}
   414  	copStream.Response = first
   415  	return resp, nil
   416  
   417  }
   418  
   419  func (c *rpcClient) getBatchCopStreamResponse(ctx context.Context, client einsteindbpb.EinsteinDBClient, req *einsteindbrpc.Request, timeout time.Duration, connArray *connArray) (*einsteindbrpc.Response, error) {
   420  	// Coprocessor streaming request.
   421  	// Use context to support timeout for grpc streaming client.
   422  	ctx1, cancel := context.WithCancel(ctx)
   423  	// Should NOT call defer cancel() here because it will cancel further stream.Recv()
   424  	// We put it in copStream.Lease.Cancel call this cancel at copStream.Close
   425  	// TODO: add unit test for SendRequest.
   426  	resp, err := einsteindbrpc.CallRPC(ctx1, client, req)
   427  	if err != nil {
   428  		cancel()
   429  		return nil, errors.Trace(err)
   430  	}
   431  
   432  	// Put the lease object to the timeout channel, so it would be checked periodically.
   433  	copStream := resp.Resp.(*einsteindbrpc.BatchCopStreamResponse)
   434  	copStream.Timeout = timeout
   435  	copStream.Lease.Cancel = cancel
   436  	connArray.streamTimeout <- &copStream.Lease
   437  
   438  	// Read the first streaming response to get CopStreamResponse.
   439  	// This can make error handling much easier, because SendReq() retry on
   440  	// region error automatically.
   441  	var first *interlock.BatchResponse
   442  	first, err = copStream.Recv()
   443  	if err != nil {
   444  		if errors.Cause(err) != io.EOF {
   445  			return nil, errors.Trace(err)
   446  		}
   447  		logutil.BgLogger().Debug("batch copstream returns nothing for the request.")
   448  	}
   449  	copStream.BatchResponse = first
   450  	return resp, nil
   451  
   452  }
   453  
   454  func (c *rpcClient) Close() error {
   455  	// TODO: add a unit test for SendRequest After Closed
   456  	c.closeConns()
   457  	return nil
   458  }