github.com/polarismesh/polaris@v1.17.8/common/redispool/redis_pool.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package redispool
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"math"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/go-redis/redis/v8"
    29  
    30  	"github.com/polarismesh/polaris/common/log"
    31  	"github.com/polarismesh/polaris/common/metrics"
    32  	"github.com/polarismesh/polaris/plugin"
    33  )
    34  
    35  const (
    36  	// Get get method define
    37  	Get = iota
    38  	// Set set method define
    39  	Set
    40  	// Del del method define
    41  	Del
    42  	// Sadd del method define
    43  	Sadd
    44  	// Srem del method define
    45  	Srem
    46  )
    47  
    48  var (
    49  	typeToCommand = map[int]string{
    50  		Get:  "GET",
    51  		Set:  "SET",
    52  		Del:  "DEL",
    53  		Sadd: "SADD",
    54  		Srem: "SREM",
    55  	}
    56  )
    57  
    58  const (
    59  	// keyPrefix the prefix for hb key
    60  	keyPrefix = "hb_"
    61  )
    62  
    63  func toRedisKey(instanceID string, compatible bool) string {
    64  	if compatible {
    65  		return instanceID
    66  	}
    67  	return fmt.Sprintf("%s%s", keyPrefix, instanceID)
    68  }
    69  
    70  // Task ckv任务请求结构体
    71  type Task struct {
    72  	taskType int
    73  	id       string
    74  	value    string
    75  	members  []string
    76  	respChan chan *Resp
    77  }
    78  
    79  // String
    80  func (t Task) String() string {
    81  	return fmt.Sprintf("{taskType: %s, id: %s}", typeToCommand[t.taskType], t.id)
    82  }
    83  
    84  // Pool ckv连接池结构体
    85  type redisPool struct {
    86  	config         *Config
    87  	ctx            context.Context
    88  	redisClient    redis.UniversalClient
    89  	redisDead      uint32
    90  	recoverTimeSec int64
    91  	statis         plugin.Statis
    92  	taskChans      []chan *Task
    93  }
    94  
    95  // NewRedisClient new redis client
    96  func NewRedisClient(config *Config, opts ...Option) redis.UniversalClient {
    97  	if config == nil {
    98  		config = DefaultConfig()
    99  	}
   100  	for _, o := range opts {
   101  		o(config)
   102  	}
   103  	var redisClient redis.UniversalClient
   104  	switch config.DeployMode {
   105  	case redisSentinel:
   106  		redisClient = redis.NewFailoverClient(config.FailOverOptions())
   107  	case redisCluster:
   108  		redisClient = redis.NewClusterClient(config.ClusterOptions())
   109  	case redisStandalone:
   110  		redisClient = redis.NewClient(config.StandaloneOptions())
   111  	default:
   112  		redisClient = redis.NewClient(config.StandaloneOptions())
   113  	}
   114  	return redisClient
   115  }
   116  
   117  // NewRedisPool init a redis connection pool instance
   118  func NewRedisPool(ctx context.Context, config *Config, statis plugin.Statis, opts ...Option) Pool {
   119  	if config.WriteTimeout == 0 {
   120  		config.WriteTimeout = config.MsgTimeout
   121  	}
   122  
   123  	if config.ReadTimeout == 0 {
   124  		config.ReadTimeout = config.MsgTimeout
   125  	}
   126  
   127  	redisClient := NewRedisClient(config, opts...)
   128  	pool := &redisPool{
   129  		config:         config,
   130  		ctx:            ctx,
   131  		redisClient:    redisClient,
   132  		recoverTimeSec: time.Now().Unix(),
   133  		statis:         statis,
   134  		taskChans:      make([]chan *Task, 0, config.Concurrency),
   135  	}
   136  
   137  	for i := 0; i < config.Concurrency; i++ {
   138  		pool.taskChans = append(pool.taskChans, make(chan *Task, 1024))
   139  	}
   140  	return pool
   141  }
   142  
   143  // Get 使用连接池,向redis发起Get请求
   144  func (p *redisPool) Get(id string) *Resp {
   145  	if err := p.checkRedisDead(); err != nil {
   146  		return &Resp{Err: err}
   147  	}
   148  	task := &Task{
   149  		taskType: Get,
   150  		id:       id,
   151  	}
   152  	return p.handleTask(task)
   153  }
   154  
   155  // Sdd 使用连接池,向redis发起Sdd请求
   156  func (p *redisPool) Sdd(id string, members []string) *Resp {
   157  	if err := p.checkRedisDead(); err != nil {
   158  		return &Resp{Err: err}
   159  	}
   160  	task := &Task{
   161  		taskType: Sadd,
   162  		id:       id,
   163  		members:  members,
   164  	}
   165  	return p.handleTaskWithRetries(task)
   166  }
   167  
   168  // Srem 使用连接池,向redis发起Srem请求
   169  func (p *redisPool) Srem(id string, members []string) *Resp {
   170  	if err := p.checkRedisDead(); err != nil {
   171  		return &Resp{Err: err}
   172  	}
   173  	task := &Task{
   174  		taskType: Srem,
   175  		id:       id,
   176  		members:  members,
   177  	}
   178  	return p.handleTaskWithRetries(task)
   179  }
   180  
   181  // Set 使用连接池,向redis发起Set请求
   182  func (p *redisPool) Set(id string, redisObj RedisObject) *Resp {
   183  	if err := p.checkRedisDead(); err != nil {
   184  		return &Resp{Err: err}
   185  	}
   186  	task := &Task{
   187  		taskType: Set,
   188  		id:       id,
   189  		value:    redisObj.Serialize(p.config.Compatible),
   190  	}
   191  	return p.handleTaskWithRetries(task)
   192  }
   193  
   194  // Del 使用连接池,向redis发起Del请求
   195  func (p *redisPool) Del(id string) *Resp {
   196  	if err := p.checkRedisDead(); err != nil {
   197  		return &Resp{Err: err}
   198  	}
   199  	task := &Task{
   200  		taskType: Del,
   201  		id:       id,
   202  	}
   203  	return p.handleTaskWithRetries(task)
   204  }
   205  
   206  func (p *redisPool) checkRedisDead() error {
   207  	if atomic.LoadUint32(&p.redisDead) == 1 {
   208  		return fmt.Errorf("redis %s is dead", p.config.KvAddr)
   209  	}
   210  	return nil
   211  }
   212  
   213  // Start 启动ckv连接池工作
   214  func (p *redisPool) Start() {
   215  	wg := &sync.WaitGroup{}
   216  	wg.Add(p.config.Concurrency)
   217  	p.startWorkers(wg)
   218  	go p.checkRedis(wg)
   219  	log.Infof("[RedisPool]redis pool started")
   220  }
   221  
   222  func (p *redisPool) startWorkers(wg *sync.WaitGroup) {
   223  	for i := 0; i < p.config.Concurrency; i++ {
   224  		go p.process(wg, i)
   225  	}
   226  }
   227  
   228  func (p *redisPool) process(wg *sync.WaitGroup, idx int) {
   229  	log.Infof("[RedisPool]redis worker %d started", idx)
   230  	ticker := time.NewTicker(p.config.WaitTime)
   231  	piper := p.redisClient.Pipeline()
   232  	defer func() {
   233  		ticker.Stop()
   234  		_ = piper.Close()
   235  		wg.Done()
   236  	}()
   237  	var tasks []*Task
   238  	for {
   239  		select {
   240  		case task := <-p.taskChans[idx]:
   241  			tasks = append(tasks, task)
   242  			if len(tasks) >= p.config.MinBatchCount {
   243  				p.handleTasks(tasks, piper)
   244  				tasks = nil
   245  			}
   246  		case <-ticker.C:
   247  			if len(tasks) > 0 {
   248  				p.handleTasks(tasks, piper)
   249  				tasks = nil
   250  			}
   251  		case <-p.ctx.Done():
   252  			return
   253  		}
   254  	}
   255  }
   256  
   257  func (p *redisPool) handleTasks(tasks []*Task, piper redis.Pipeliner) {
   258  	cmders := make([]redis.Cmder, len(tasks))
   259  	for i, task := range tasks {
   260  		cmders[i] = p.doHandleTask(task, piper)
   261  	}
   262  	_, _ = piper.Exec(context.Background())
   263  	for i, cmder := range cmders {
   264  		func(idx int, cmd redis.Cmder) {
   265  			var resp = &Resp{}
   266  			task := tasks[idx]
   267  			defer func() {
   268  				task.respChan <- resp
   269  			}()
   270  			switch typedCmd := cmd.(type) {
   271  			case *redis.StringCmd:
   272  				resp.Value, resp.Err = typedCmd.Result()
   273  				resp.Exists = true
   274  				if resp.Err == redis.Nil {
   275  					resp.Err = nil
   276  					resp.Exists = false
   277  				}
   278  			case *redis.StatusCmd:
   279  				_, resp.Err = typedCmd.Result()
   280  			case *redis.IntCmd:
   281  				_, resp.Err = typedCmd.Result()
   282  			default:
   283  				resp.Err = fmt.Errorf("unknown type %s for task %s", typedCmd, *task)
   284  			}
   285  		}(i, cmder)
   286  	}
   287  }
   288  
   289  const (
   290  	redisCheckInterval = 1 * time.Second
   291  	errCountThreshold  = 2
   292  	maxCheckCount      = 3
   293  	retryBackoff       = 30 * time.Millisecond
   294  )
   295  
   296  func sleep(dur time.Duration) {
   297  	t := time.NewTimer(dur)
   298  	defer t.Stop()
   299  
   300  	<-t.C
   301  }
   302  
   303  // checkRedis check redis alive
   304  func (p *redisPool) checkRedis(wg *sync.WaitGroup) {
   305  	ticker := time.NewTicker(redisCheckInterval)
   306  	defer ticker.Stop()
   307  	for {
   308  		select {
   309  		case <-ticker.C:
   310  			var errCount int
   311  			for i := 0; i < maxCheckCount; i++ {
   312  				if !p.doCheckRedis() {
   313  					errCount++
   314  				}
   315  			}
   316  			if errCount >= errCountThreshold {
   317  				metrics.ReportRedisIsDead()
   318  				if atomic.CompareAndSwapUint32(&p.redisDead, 0, 1) {
   319  					atomic.StoreInt64(&p.recoverTimeSec, 0)
   320  				}
   321  			} else {
   322  				metrics.ReportRedisIsAlive()
   323  				if atomic.CompareAndSwapUint32(&p.redisDead, 1, 0) {
   324  					atomic.StoreInt64(&p.recoverTimeSec, time.Now().Unix())
   325  				}
   326  			}
   327  		case <-p.ctx.Done():
   328  			wg.Wait()
   329  			_ = p.redisClient.Close()
   330  			return
   331  		}
   332  	}
   333  }
   334  
   335  // RecoverTimeSec the time second record when recover
   336  func (p *redisPool) RecoverTimeSec() int64 {
   337  	return atomic.LoadInt64(&p.recoverTimeSec)
   338  }
   339  
   340  // doCheckRedis test the connection
   341  func (p *redisPool) doCheckRedis() bool {
   342  	_, err := p.redisClient.Ping(context.Background()).Result()
   343  
   344  	return err == nil
   345  }
   346  
   347  const (
   348  	maxProcessDuration = 1000 * time.Millisecond
   349  )
   350  
   351  var indexer int64
   352  
   353  func nextIndex() int64 {
   354  	value := atomic.AddInt64(&indexer, 1)
   355  	if value == math.MaxInt64 {
   356  		atomic.CompareAndSwapInt64(&indexer, value, 0)
   357  		value = atomic.AddInt64(&indexer, 1)
   358  	}
   359  	return value
   360  }
   361  
   362  // handleTaskWithRetries 任务重试执行
   363  func (p *redisPool) handleTaskWithRetries(task *Task) *Resp {
   364  	var count = 1
   365  	if p.config.MaxRetry > 0 {
   366  		count += p.config.MaxRetry
   367  	}
   368  	var resp *Resp
   369  	for i := 0; i < count; i++ {
   370  		if i > 0 {
   371  			sleep(retryBackoff)
   372  		}
   373  		resp = p.handleTask(task)
   374  		if resp.Err == nil || !resp.shouldRetry {
   375  			break
   376  		}
   377  		log.Errorf("[RedisPool] fail to handle task %s, retry count %d, err is %v", *task, i, resp.Err)
   378  	}
   379  	return resp
   380  }
   381  
   382  // handleTask 任务处理函数
   383  func (p *redisPool) handleTask(task *Task) *Resp {
   384  	var startTime = time.Now()
   385  	task.respChan = make(chan *Resp, 1)
   386  	idx := int(nextIndex()) % len(p.taskChans)
   387  	select {
   388  	case p.taskChans[idx] <- task:
   389  	case <-p.ctx.Done():
   390  		return &Resp{Err: fmt.Errorf("worker has been stopped while sheduling task %s", *task),
   391  			Compatible: p.config.Compatible, shouldRetry: false}
   392  	}
   393  	var resp *Resp
   394  	select {
   395  	case resp = <-task.respChan:
   396  	case <-p.ctx.Done():
   397  		return &Resp{Err: fmt.Errorf("worker has been stopped while fetching resp for task %s", *task),
   398  			Compatible: p.config.Compatible, shouldRetry: false}
   399  	}
   400  	resp.Compatible = p.config.Compatible
   401  	resp.shouldRetry = true
   402  	p.afterHandleTask(startTime, typeToCommand[task.taskType], task, resp)
   403  	return resp
   404  }
   405  
   406  const (
   407  	callResultOk   = 0
   408  	callResultFail = 1
   409  )
   410  
   411  func (p *redisPool) afterHandleTask(startTime time.Time, command string, task *Task, resp *Resp) {
   412  	costDuration := time.Since(startTime)
   413  	if costDuration >= maxProcessDuration && task.taskType != Get {
   414  		log.Warnf("[RedisPool] too slow to process task %s, "+
   415  			"duration %s, greater than %s", task.String(), costDuration, maxProcessDuration)
   416  	}
   417  	code := callResultOk
   418  	if resp.Err != nil {
   419  		code = callResultFail
   420  		switch task.taskType {
   421  		case Set, Del, Sadd, Srem:
   422  			metrics.ReportRedisWriteFailure()
   423  		default:
   424  			metrics.ReportRedisReadFailure()
   425  		}
   426  	}
   427  	p.statis.ReportCallMetrics(metrics.CallMetric{
   428  		Type:     metrics.RedisCallMetric,
   429  		API:      command,
   430  		Code:     int(code),
   431  		Duration: costDuration,
   432  	})
   433  }
   434  
   435  func (p *redisPool) doHandleTask(task *Task, piper redis.Pipeliner) redis.Cmder {
   436  	switch task.taskType {
   437  	case Set:
   438  		return piper.Set(context.Background(), toRedisKey(task.id, p.config.Compatible), task.value, 0)
   439  	case Del:
   440  		return piper.Del(context.Background(), toRedisKey(task.id, p.config.Compatible))
   441  	case Sadd:
   442  		return piper.SAdd(context.Background(), task.id, task.members)
   443  	case Srem:
   444  		return piper.SRem(context.Background(), task.id, task.members)
   445  	default:
   446  		return piper.Get(context.Background(), toRedisKey(task.id, p.config.Compatible))
   447  	}
   448  }