github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/kv/grpc_pool_impl.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package kv
    15  
    16  import (
    17  	"context"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/pingcap/log"
    22  	cerror "github.com/pingcap/tiflow/pkg/errors"
    23  	"github.com/pingcap/tiflow/pkg/security"
    24  	"go.uber.org/zap"
    25  	"google.golang.org/grpc"
    26  	gbackoff "google.golang.org/grpc/backoff"
    27  	"google.golang.org/grpc/keepalive"
    28  )
    29  
    30  const (
    31  	// The default max number of TiKV concurrent streams in each connection is 1024
    32  	grpcConnCapacity = 1000
    33  
    34  	// resizeBucket means how many buckets will be extended when resizing a conn array
    35  	resizeBucketStep = 2
    36  
    37  	updateMetricInterval = 1 * time.Minute
    38  	recycleConnInterval  = 10 * time.Minute
    39  )
    40  
    41  // connArray is an array of sharedConn
    42  type connArray struct {
    43  	// target is TiKV storage address
    44  	target string
    45  
    46  	mu    sync.Mutex
    47  	conns []*sharedConn
    48  
    49  	// next is used for fetching sharedConn in a round-robin way
    50  	next int
    51  }
    52  
    53  func newConnArray(target string) *connArray {
    54  	return &connArray{target: target}
    55  }
    56  
    57  // resize increases conn array size by `size` parameter
    58  func (ca *connArray) resize(ctx context.Context, credential *security.Credential, size int) error {
    59  	conns := make([]*sharedConn, 0, size)
    60  	for i := 0; i < size; i++ {
    61  		conn, err := createClientConn(ctx, credential, ca.target)
    62  		if err != nil {
    63  			return err
    64  		}
    65  		conns = append(conns, &sharedConn{ClientConn: conn, active: 0})
    66  	}
    67  	ca.conns = append(ca.conns, conns...)
    68  	return nil
    69  }
    70  
    71  func createClientConn(ctx context.Context, credential *security.Credential, target string) (*grpc.ClientConn, error) {
    72  	grpcTLSOption, err := credential.ToGRPCDialOption()
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	ctx, cancel := context.WithTimeout(ctx, dialTimeout)
    77  	defer cancel()
    78  
    79  	conn, err := grpc.DialContext(
    80  		ctx,
    81  		target,
    82  		grpcTLSOption,
    83  		grpc.WithInitialWindowSize(grpcInitialWindowSize),
    84  		grpc.WithInitialConnWindowSize(grpcInitialConnWindowSize),
    85  		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(grpcMaxCallRecvMsgSize)),
    86  		grpc.WithUnaryInterceptor(grpcMetrics.UnaryClientInterceptor()),
    87  		grpc.WithStreamInterceptor(grpcMetrics.StreamClientInterceptor()),
    88  		grpc.WithConnectParams(grpc.ConnectParams{
    89  			Backoff: gbackoff.Config{
    90  				BaseDelay:  time.Second,
    91  				Multiplier: 1.1,
    92  				Jitter:     0.1,
    93  				MaxDelay:   3 * time.Second,
    94  			},
    95  			MinConnectTimeout: 3 * time.Second,
    96  		}),
    97  		grpc.WithKeepaliveParams(keepalive.ClientParameters{
    98  			Time:                10 * time.Second,
    99  			Timeout:             3 * time.Second,
   100  			PermitWithoutStream: true,
   101  		}),
   102  	)
   103  	if err != nil {
   104  		return nil, cerror.WrapError(cerror.ErrGRPCDialFailed, err)
   105  	}
   106  	return conn, nil
   107  }
   108  
   109  // getNext gets next available sharedConn, if all conns are not available, scale
   110  // the connArray to double size.
   111  func (ca *connArray) getNext(ctx context.Context, credential *security.Credential) (*sharedConn, error) {
   112  	ca.mu.Lock()
   113  	defer ca.mu.Unlock()
   114  
   115  	if len(ca.conns) == 0 {
   116  		err := ca.resize(ctx, credential, resizeBucketStep)
   117  		if err != nil {
   118  			return nil, err
   119  		}
   120  	}
   121  	for current := ca.next; current < ca.next+len(ca.conns); current++ {
   122  		conn := ca.conns[current%len(ca.conns)]
   123  		if conn.active < grpcConnCapacity {
   124  			conn.active++
   125  			ca.next = (current + 1) % len(ca.conns)
   126  			return conn, nil
   127  		}
   128  	}
   129  
   130  	current := len(ca.conns)
   131  	// if there is no available conn, increase connArray size by 2.
   132  	err := ca.resize(ctx, credential, resizeBucketStep)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  	ca.conns[current].active++
   137  	ca.next = current + 1
   138  	return ca.conns[current], nil
   139  }
   140  
   141  // recycle removes idle sharedConn, return true if no active gPRC connections remained.
   142  func (ca *connArray) recycle() (empty bool) {
   143  	ca.mu.Lock()
   144  	defer ca.mu.Unlock()
   145  	i := 0
   146  	for _, conn := range ca.conns {
   147  		if conn.active > 0 {
   148  			ca.conns[i] = conn
   149  			i++
   150  		} else {
   151  			// tear down this grpc.ClientConn, we don't use it anymore, the returned
   152  			// not-nil error can be ignored
   153  			_ = conn.Close()
   154  		}
   155  	}
   156  	// erasing truncated values
   157  	for j := i; j < len(ca.conns); j++ {
   158  		ca.conns[j] = nil
   159  	}
   160  	ca.conns = ca.conns[:i]
   161  	return len(ca.conns) == 0
   162  }
   163  
   164  func (ca *connArray) activeCount() (count int64) {
   165  	ca.mu.Lock()
   166  	defer ca.mu.Unlock()
   167  	for _, conn := range ca.conns {
   168  		count += conn.active
   169  	}
   170  	return
   171  }
   172  
   173  // close tears down all ClientConns maintained in connArray
   174  func (ca *connArray) close() {
   175  	ca.mu.Lock()
   176  	defer ca.mu.Unlock()
   177  	for _, conn := range ca.conns {
   178  		// tear down this grpc.ClientConn, we don't use it anymore, the returned
   179  		// not-nil error can be ignored
   180  		_ = conn.Close()
   181  	}
   182  }
   183  
   184  // GrpcPoolImpl implement GrpcPool interface
   185  type GrpcPoolImpl struct {
   186  	poolMu sync.RWMutex
   187  	// bucketConns maps from TiKV store address to a connArray, which stores
   188  	// a slice of gRPC connections.
   189  	bucketConns map[string]*connArray
   190  
   191  	credential *security.Credential
   192  
   193  	// lifecycles of all gPRC connections are bounded to this context
   194  	ctx context.Context
   195  }
   196  
   197  // NewGrpcPoolImpl creates a new GrpcPoolImpl instance
   198  func NewGrpcPoolImpl(ctx context.Context, credential *security.Credential) *GrpcPoolImpl {
   199  	return &GrpcPoolImpl{
   200  		credential:  credential,
   201  		bucketConns: make(map[string]*connArray),
   202  		ctx:         ctx,
   203  	}
   204  }
   205  
   206  // GetConn implements GrpcPool.GetConn
   207  func (pool *GrpcPoolImpl) GetConn(addr string) (*sharedConn, error) {
   208  	pool.poolMu.Lock()
   209  	defer pool.poolMu.Unlock()
   210  	if _, ok := pool.bucketConns[addr]; !ok {
   211  		pool.bucketConns[addr] = newConnArray(addr)
   212  	}
   213  	return pool.bucketConns[addr].getNext(pool.ctx, pool.credential)
   214  }
   215  
   216  // ReleaseConn implements GrpcPool.ReleaseConn
   217  func (pool *GrpcPoolImpl) ReleaseConn(sc *sharedConn, addr string) {
   218  	pool.poolMu.RLock()
   219  	defer pool.poolMu.RUnlock()
   220  	if bucket, ok := pool.bucketConns[addr]; !ok {
   221  		log.Warn("resource is not found in grpc pool", zap.String("addr", addr))
   222  	} else {
   223  		bucket.mu.Lock()
   224  		sc.active--
   225  		bucket.mu.Unlock()
   226  	}
   227  }
   228  
   229  // RecycleConn implements GrpcPool.RecycleConn
   230  func (pool *GrpcPoolImpl) RecycleConn(ctx context.Context) {
   231  	recycleTicker := time.NewTicker(recycleConnInterval)
   232  	defer recycleTicker.Stop()
   233  	metricTicker := time.NewTicker(updateMetricInterval)
   234  	defer metricTicker.Stop()
   235  	for {
   236  		select {
   237  		case <-ctx.Done():
   238  			return
   239  		case <-recycleTicker.C:
   240  			pool.poolMu.Lock()
   241  			for addr, bucket := range pool.bucketConns {
   242  				empty := bucket.recycle()
   243  				if empty {
   244  					log.Info("recycle connections in grpc pool", zap.String("address", addr))
   245  					delete(pool.bucketConns, addr)
   246  					grpcPoolStreamGauge.DeleteLabelValues(addr)
   247  				}
   248  			}
   249  			pool.poolMu.Unlock()
   250  		case <-metricTicker.C:
   251  			pool.poolMu.RLock()
   252  			for addr, bucket := range pool.bucketConns {
   253  				grpcPoolStreamGauge.WithLabelValues(addr).Set(float64(bucket.activeCount()))
   254  			}
   255  			pool.poolMu.RUnlock()
   256  		}
   257  	}
   258  }
   259  
   260  // Close implements GrpcPool.Close
   261  func (pool *GrpcPoolImpl) Close() {
   262  	pool.poolMu.Lock()
   263  	defer pool.poolMu.Unlock()
   264  	for _, bucket := range pool.bucketConns {
   265  		bucket.close()
   266  	}
   267  }