github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/client.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 // Package einsteindb provides tcp connection to ekvserver. 15 package einsteindb 16 17 import ( 18 "context" 19 "io" 20 "math" 21 "runtime/trace" 22 "strconv" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 grpc_opentracing "github.com/grpc-ecosystem/go-grpc-midbseware/tracing/opentracing" 28 "github.com/opentracing/opentracing-go" 29 "github.com/prometheus/client_golang/prometheus" 30 "github.com/whtcorpsinc/BerolinaSQL/terror" 31 "github.com/whtcorpsinc/ekvproto/pkg/debugpb" 32 "github.com/whtcorpsinc/ekvproto/pkg/einsteindbpb" 33 "github.com/whtcorpsinc/ekvproto/pkg/interlock" 34 "github.com/whtcorpsinc/errors" 35 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc" 36 "github.com/whtcorpsinc/milevadb/config" 37 "github.com/whtcorpsinc/milevadb/ekv" 38 "github.com/whtcorpsinc/milevadb/metrics" 39 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 40 "github.com/whtcorpsinc/milevadb/soliton/logutil" 41 "google.golang.org/grpc" 42 "google.golang.org/grpc/backoff" 43 "google.golang.org/grpc/connectivity" 44 "google.golang.org/grpc/credentials" 45 "google.golang.org/grpc/keepalive" 46 ) 47 48 // MaxRecvMsgSize set max gRPC receive message size received from server. If any message size is larger than 49 // current value, an error will be reported from gRPC. 50 var MaxRecvMsgSize = math.MaxInt64 51 52 // Timeout durations. 53 var ( 54 dialTimeout = 5 * time.Second 55 readTimeoutShort = 20 * time.Second // For requests that read/write several key-values. 56 ReadTimeoutMedium = 60 * time.Second // For requests that may need scan region. 57 ReadTimeoutLong = 150 * time.Second // For requests that may need scan region multiple times. 58 ReadTimeoutUltraLong = 3600 * time.Second // For requests that may scan many regions for tiflash. 59 GCTimeout = 5 * time.Minute 60 UnsafeDestroyRangeTimeout = 5 * time.Minute 61 AccessLockObserverTimeout = 10 * time.Second 62 ) 63 64 const ( 65 grpcInitialWindowSize = 1 << 30 66 grpcInitialConnWindowSize = 1 << 30 67 ) 68 69 // Client is a client that sends RPC. 70 // It should not be used after calling Close(). 71 type Client interface { 72 // Close should release all data. 73 Close() error 74 // SendRequest sends Request. 75 SendRequest(ctx context.Context, addr string, req *einsteindbrpc.Request, timeout time.Duration) (*einsteindbrpc.Response, error) 76 } 77 78 type connArray struct { 79 // The target host. 80 target string 81 82 index uint32 83 v []*grpc.ClientConn 84 // streamTimeout binds with a background goroutine to process interlock streaming timeout. 85 streamTimeout chan *einsteindbrpc.Lease 86 dialTimeout time.Duration 87 // batchConn is not null when batch is enabled. 88 *batchConn 89 done chan struct{} 90 } 91 92 func newConnArray(maxSize uint, addr string, security config.Security, idleNotify *uint32, enableBatch bool, dialTimeout time.Duration) (*connArray, error) { 93 a := &connArray{ 94 index: 0, 95 v: make([]*grpc.ClientConn, maxSize), 96 streamTimeout: make(chan *einsteindbrpc.Lease, 1024), 97 done: make(chan struct{}), 98 dialTimeout: dialTimeout, 99 } 100 if err := a.Init(addr, security, idleNotify, enableBatch); err != nil { 101 return nil, err 102 } 103 return a, nil 104 } 105 106 func (a *connArray) Init(addr string, security config.Security, idleNotify *uint32, enableBatch bool) error { 107 a.target = addr 108 109 opt := grpc.WithInsecure() 110 if len(security.ClusterSSLCA) != 0 { 111 tlsConfig, err := security.ToTLSConfig() 112 if err != nil { 113 return errors.Trace(err) 114 } 115 opt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) 116 } 117 118 cfg := config.GetGlobalConfig() 119 var ( 120 unaryInterceptor grpc.UnaryClientInterceptor 121 streamInterceptor grpc.StreamClientInterceptor 122 ) 123 if cfg.OpenTracing.Enable { 124 unaryInterceptor = grpc_opentracing.UnaryClientInterceptor() 125 streamInterceptor = grpc_opentracing.StreamClientInterceptor() 126 } 127 128 allowBatch := (cfg.EinsteinDBClient.MaxBatchSize > 0) && enableBatch 129 if allowBatch { 130 a.batchConn = newBatchConn(uint(len(a.v)), cfg.EinsteinDBClient.MaxBatchSize, idleNotify) 131 a.pendingRequests = metrics.EinsteinDBPendingBatchRequests.WithLabelValues(a.target) 132 } 133 keepAlive := cfg.EinsteinDBClient.GrpcKeepAliveTime 134 keepAliveTimeout := cfg.EinsteinDBClient.GrpcKeepAliveTimeout 135 for i := range a.v { 136 ctx, cancel := context.WithTimeout(context.Background(), a.dialTimeout) 137 conn, err := grpc.DialContext( 138 ctx, 139 addr, 140 opt, 141 grpc.WithInitialWindowSize(grpcInitialWindowSize), 142 grpc.WithInitialConnWindowSize(grpcInitialConnWindowSize), 143 grpc.WithUnaryInterceptor(unaryInterceptor), 144 grpc.WithStreamInterceptor(streamInterceptor), 145 grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(MaxRecvMsgSize)), 146 grpc.WithConnectParams(grpc.ConnectParams{ 147 Backoff: backoff.Config{ 148 BaseDelay: 100 * time.Millisecond, // Default was 1s. 149 Multiplier: 1.6, // Default 150 Jitter: 0.2, // Default 151 MaxDelay: 3 * time.Second, // Default was 120s. 152 }, 153 MinConnectTimeout: a.dialTimeout, 154 }), 155 grpc.WithKeepaliveParams(keepalive.ClientParameters{ 156 Time: time.Duration(keepAlive) * time.Second, 157 Timeout: time.Duration(keepAliveTimeout) * time.Second, 158 PermitWithoutStream: true, 159 }), 160 ) 161 cancel() 162 if err != nil { 163 // Cleanup if the initialization fails. 164 a.Close() 165 return errors.Trace(err) 166 } 167 a.v[i] = conn 168 169 if allowBatch { 170 batchClient := &batchCommandsClient{ 171 target: a.target, 172 conn: conn, 173 batched: sync.Map{}, 174 idAlloc: 0, 175 closed: 0, 176 einsteindbClientCfg: cfg.EinsteinDBClient, 177 einsteindbLoad: &a.einsteindbTransportLayerLoad, 178 dialTimeout: a.dialTimeout, 179 } 180 a.batchCommandsClients = append(a.batchCommandsClients, batchClient) 181 } 182 } 183 go einsteindbrpc.CheckStreamTimeoutLoop(a.streamTimeout, a.done) 184 if allowBatch { 185 go a.batchSendLoop(cfg.EinsteinDBClient) 186 } 187 188 return nil 189 } 190 191 func (a *connArray) Get() *grpc.ClientConn { 192 next := atomic.AddUint32(&a.index, 1) % uint32(len(a.v)) 193 return a.v[next] 194 } 195 196 func (a *connArray) Close() { 197 if a.batchConn != nil { 198 a.batchConn.Close() 199 } 200 201 for i, c := range a.v { 202 if c != nil { 203 err := c.Close() 204 terror.Log(errors.Trace(err)) 205 a.v[i] = nil 206 } 207 } 208 209 close(a.done) 210 } 211 212 // rpcClient is RPC client struct. 213 // TODO: Add flow control between RPC clients in MilevaDB ond RPC servers in EinsteinDB. 214 // Since we use shared client connection to communicate to the same EinsteinDB, it's possible 215 // that there are too many concurrent requests which overload the service of EinsteinDB. 216 type rpcClient struct { 217 sync.RWMutex 218 219 conns map[string]*connArray 220 security config.Security 221 222 idleNotify uint32 223 // Periodically check whether there is any connection that is idle and then close and remove these connections. 224 // Implement background cleanup. 225 isClosed bool 226 dialTimeout time.Duration 227 } 228 229 func newRPCClient(security config.Security, opts ...func(c *rpcClient)) *rpcClient { 230 cli := &rpcClient{ 231 conns: make(map[string]*connArray), 232 security: security, 233 dialTimeout: dialTimeout, 234 } 235 for _, opt := range opts { 236 opt(cli) 237 } 238 return cli 239 } 240 241 // NewTestRPCClient is for some external tests. 242 func NewTestRPCClient(security config.Security) Client { 243 return newRPCClient(security) 244 } 245 246 func (c *rpcClient) getConnArray(addr string, enableBatch bool, opt ...func(cfg *config.EinsteinDBClient)) (*connArray, error) { 247 c.RLock() 248 if c.isClosed { 249 c.RUnlock() 250 return nil, errors.Errorf("rpcClient is closed") 251 } 252 array, ok := c.conns[addr] 253 c.RUnlock() 254 if !ok { 255 var err error 256 array, err = c.createConnArray(addr, enableBatch, opt...) 257 if err != nil { 258 return nil, err 259 } 260 } 261 return array, nil 262 } 263 264 func (c *rpcClient) createConnArray(addr string, enableBatch bool, opts ...func(cfg *config.EinsteinDBClient)) (*connArray, error) { 265 c.Lock() 266 defer c.Unlock() 267 array, ok := c.conns[addr] 268 if !ok { 269 var err error 270 client := config.GetGlobalConfig().EinsteinDBClient 271 for _, opt := range opts { 272 opt(&client) 273 } 274 array, err = newConnArray(client.GrpcConnectionCount, addr, c.security, &c.idleNotify, enableBatch, c.dialTimeout) 275 if err != nil { 276 return nil, err 277 } 278 c.conns[addr] = array 279 } 280 return array, nil 281 } 282 283 func (c *rpcClient) closeConns() { 284 c.Lock() 285 if !c.isClosed { 286 c.isClosed = true 287 // close all connections 288 for _, array := range c.conns { 289 array.Close() 290 } 291 } 292 c.Unlock() 293 } 294 295 var sendReqHistCache sync.Map 296 297 type sendReqHistCacheKey struct { 298 tp einsteindbrpc.CmdType 299 id uint64 300 } 301 302 func (c *rpcClient) uFIDelateEinsteinDBSendReqHistogram(req *einsteindbrpc.Request, start time.Time) { 303 key := sendReqHistCacheKey{ 304 req.Type, 305 req.Context.GetPeer().GetStoreId(), 306 } 307 308 v, ok := sendReqHistCache.Load(key) 309 if !ok { 310 reqType := req.Type.String() 311 storeID := strconv.FormatUint(req.Context.GetPeer().GetStoreId(), 10) 312 v = metrics.EinsteinDBSendReqHistogram.WithLabelValues(reqType, storeID) 313 sendReqHistCache.CausetStore(key, v) 314 } 315 316 v.(prometheus.Observer).Observe(time.Since(start).Seconds()) 317 } 318 319 // SendRequest sends a Request to server and receives Response. 320 func (c *rpcClient) SendRequest(ctx context.Context, addr string, req *einsteindbrpc.Request, timeout time.Duration) (*einsteindbrpc.Response, error) { 321 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 322 span1 := span.Tracer().StartSpan("rpcClient.SendRequest", opentracing.ChildOf(span.Context())) 323 defer span1.Finish() 324 ctx = opentracing.ContextWithSpan(ctx, span1) 325 } 326 327 start := time.Now() 328 defer func() { 329 stmtInterDirc := ctx.Value(execdetails.StmtInterDircDetailKey) 330 if stmtInterDirc != nil { 331 detail := stmtInterDirc.(*execdetails.StmtInterDircDetails) 332 atomic.AddInt64(&detail.WaitKVResFIDeluration, int64(time.Since(start))) 333 } 334 c.uFIDelateEinsteinDBSendReqHistogram(req, start) 335 }() 336 337 if atomic.CompareAndSwapUint32(&c.idleNotify, 1, 0) { 338 c.recycleIdleConnArray() 339 } 340 341 // MilevaDB will not send batch commands to TiFlash, to resolve the conflict with Batch Causet Request. 342 enableBatch := req.StoreTp != ekv.MilevaDB && req.StoreTp != ekv.TiFlash 343 connArray, err := c.getConnArray(addr, enableBatch) 344 if err != nil { 345 return nil, errors.Trace(err) 346 } 347 348 // MilevaDB RPC server supports batch RPC, but batch connection will send heart beat, It's not necessary since 349 // request to MilevaDB is not high frequency. 350 if config.GetGlobalConfig().EinsteinDBClient.MaxBatchSize > 0 && enableBatch { 351 if batchReq := req.ToBatchCommandsRequest(); batchReq != nil { 352 defer trace.StartRegion(ctx, req.Type.String()).End() 353 return sendBatchRequest(ctx, addr, connArray.batchConn, batchReq, timeout) 354 } 355 } 356 357 clientConn := connArray.Get() 358 if state := clientConn.GetState(); state == connectivity.TransientFailure { 359 storeID := strconv.FormatUint(req.Context.GetPeer().GetStoreId(), 10) 360 metrics.GRPCConnTransientFailureCounter.WithLabelValues(addr, storeID).Inc() 361 } 362 363 if req.IsDebugReq() { 364 client := debugpb.NewDebugClient(clientConn) 365 ctx1, cancel := context.WithTimeout(ctx, timeout) 366 defer cancel() 367 return einsteindbrpc.CallDebugRPC(ctx1, client, req) 368 } 369 370 client := einsteindbpb.NewEinsteinDBClient(clientConn) 371 372 if req.Type == einsteindbrpc.CmdBatchCop { 373 return c.getBatchCopStreamResponse(ctx, client, req, timeout, connArray) 374 } 375 376 if req.Type == einsteindbrpc.CmdCopStream { 377 return c.getCopStreamResponse(ctx, client, req, timeout, connArray) 378 } 379 ctx1, cancel := context.WithTimeout(ctx, timeout) 380 defer cancel() 381 return einsteindbrpc.CallRPC(ctx1, client, req) 382 } 383 384 func (c *rpcClient) getCopStreamResponse(ctx context.Context, client einsteindbpb.EinsteinDBClient, req *einsteindbrpc.Request, timeout time.Duration, connArray *connArray) (*einsteindbrpc.Response, error) { 385 // Coprocessor streaming request. 386 // Use context to support timeout for grpc streaming client. 387 ctx1, cancel := context.WithCancel(ctx) 388 // Should NOT call defer cancel() here because it will cancel further stream.Recv() 389 // We put it in copStream.Lease.Cancel call this cancel at copStream.Close 390 // TODO: add unit test for SendRequest. 391 resp, err := einsteindbrpc.CallRPC(ctx1, client, req) 392 if err != nil { 393 cancel() 394 return nil, errors.Trace(err) 395 } 396 397 // Put the lease object to the timeout channel, so it would be checked periodically. 398 copStream := resp.Resp.(*einsteindbrpc.CopStreamResponse) 399 copStream.Timeout = timeout 400 copStream.Lease.Cancel = cancel 401 connArray.streamTimeout <- &copStream.Lease 402 403 // Read the first streaming response to get CopStreamResponse. 404 // This can make error handling much easier, because SendReq() retry on 405 // region error automatically. 406 var first *interlock.Response 407 first, err = copStream.Recv() 408 if err != nil { 409 if errors.Cause(err) != io.EOF { 410 return nil, errors.Trace(err) 411 } 412 logutil.BgLogger().Debug("copstream returns nothing for the request.") 413 } 414 copStream.Response = first 415 return resp, nil 416 417 } 418 419 func (c *rpcClient) getBatchCopStreamResponse(ctx context.Context, client einsteindbpb.EinsteinDBClient, req *einsteindbrpc.Request, timeout time.Duration, connArray *connArray) (*einsteindbrpc.Response, error) { 420 // Coprocessor streaming request. 421 // Use context to support timeout for grpc streaming client. 422 ctx1, cancel := context.WithCancel(ctx) 423 // Should NOT call defer cancel() here because it will cancel further stream.Recv() 424 // We put it in copStream.Lease.Cancel call this cancel at copStream.Close 425 // TODO: add unit test for SendRequest. 426 resp, err := einsteindbrpc.CallRPC(ctx1, client, req) 427 if err != nil { 428 cancel() 429 return nil, errors.Trace(err) 430 } 431 432 // Put the lease object to the timeout channel, so it would be checked periodically. 433 copStream := resp.Resp.(*einsteindbrpc.BatchCopStreamResponse) 434 copStream.Timeout = timeout 435 copStream.Lease.Cancel = cancel 436 connArray.streamTimeout <- &copStream.Lease 437 438 // Read the first streaming response to get CopStreamResponse. 439 // This can make error handling much easier, because SendReq() retry on 440 // region error automatically. 441 var first *interlock.BatchResponse 442 first, err = copStream.Recv() 443 if err != nil { 444 if errors.Cause(err) != io.EOF { 445 return nil, errors.Trace(err) 446 } 447 logutil.BgLogger().Debug("batch copstream returns nothing for the request.") 448 } 449 copStream.BatchResponse = first 450 return resp, nil 451 452 } 453 454 func (c *rpcClient) Close() error { 455 // TODO: add a unit test for SendRequest After Closed 456 c.closeConns() 457 return nil 458 }