github.com/polarismesh/polaris@v1.17.8/plugin/healthchecker/leader/peer.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package leader
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"math/rand"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage"
    30  	"go.uber.org/zap"
    31  	"google.golang.org/grpc"
    32  	"google.golang.org/grpc/metadata"
    33  
    34  	"github.com/polarismesh/polaris/common/batchjob"
    35  	commonhash "github.com/polarismesh/polaris/common/hash"
    36  	"github.com/polarismesh/polaris/common/utils"
    37  )
    38  
    39  var (
    40  	NewLocalPeerFunc  = newLocalPeer
    41  	NewRemotePeerFunc = newRemotePeer
    42  )
    43  
    44  func newLocalPeer() Peer {
    45  	return &LocalPeer{}
    46  }
    47  
    48  func newRemotePeer() Peer {
    49  	return &RemotePeer{}
    50  }
    51  
    52  // Peer peer
    53  type Peer interface {
    54  	// Initialize .
    55  	Initialize(conf Config)
    56  	// Serve .
    57  	Serve(ctx context.Context, checker *LeaderHealthChecker, listenIP string, listenPort uint32) error
    58  	// Get .
    59  	Get(key string) (*ReadBeatRecord, error)
    60  	// Put .
    61  	Put(record WriteBeatRecord) error
    62  	// Del .
    63  	Del(key string) error
    64  	// Close .
    65  	Close() error
    66  	// Host .
    67  	Host() string
    68  	// Storage
    69  	Storage() BeatRecordCache
    70  }
    71  
    72  // LocalPeer Heartbeat data storage node
    73  type LocalPeer struct {
    74  	once sync.Once
    75  	// Cache data storage
    76  	Cache BeatRecordCache
    77  	// cancel .
    78  	cancel context.CancelFunc
    79  }
    80  
    81  func (p *LocalPeer) Initialize(conf Config) {
    82  	p.Cache = newLocalBeatRecordCache(conf.SoltNum, commonhash.Fnv32)
    83  }
    84  
    85  func (p *LocalPeer) Serve(ctx context.Context, checker *LeaderHealthChecker,
    86  	listenIP string, listenPort uint32) error {
    87  	log.Info("[HealthCheck][Leader] local peer serve")
    88  	return nil
    89  }
    90  
    91  // Get get records
    92  func (p *LocalPeer) Host() string {
    93  	return utils.LocalHost
    94  }
    95  
    96  // Get get records
    97  func (p *LocalPeer) Get(key string) (*ReadBeatRecord, error) {
    98  	ret := p.Cache.Get(key)
    99  	return ret[key], nil
   100  }
   101  
   102  // Put put records
   103  func (p *LocalPeer) Put(record WriteBeatRecord) error {
   104  	p.Cache.Put(record)
   105  	return nil
   106  }
   107  
   108  // Del del records
   109  func (p *LocalPeer) Del(key string) error {
   110  	p.Cache.Del(key)
   111  	return nil
   112  }
   113  
   114  // Close close peer life
   115  func (p *LocalPeer) Close() error {
   116  	log.Info("[HealthCheck][Leader] local peer close")
   117  	if p.cancel != nil {
   118  		p.cancel()
   119  	}
   120  	return nil
   121  }
   122  
   123  func (p *LocalPeer) Storage() BeatRecordCache {
   124  	return p.Cache
   125  }
   126  
   127  // LocalPeer Heartbeat data storage node
   128  type RemotePeer struct {
   129  	// Host peer host
   130  	host string
   131  	// Port peer listen port to provider grpc service
   132  	port uint32
   133  	// Conn grpc connection
   134  	conns []*grpc.ClientConn
   135  	// Puters 批量心跳发送, 由于一个 stream 对于 server 是一个 goroutine,为了加快 follower 发往 leader 的效率
   136  	// 这里采用多个 Putter Client 创建多个 Stream
   137  	puters []*beatSender
   138  	// Cache data storage
   139  	Cache BeatRecordCache
   140  	// cancel .
   141  	cancel context.CancelFunc
   142  	// conf .
   143  	conf Config
   144  	// closed .
   145  	closed int32
   146  }
   147  
   148  func (p *RemotePeer) Initialize(conf Config) {
   149  	p.conf = conf
   150  }
   151  
   152  func (p *RemotePeer) isClose() bool {
   153  	return atomic.LoadInt32(&p.closed) == 1
   154  }
   155  
   156  func (p *RemotePeer) Serve(_ context.Context, checker *LeaderHealthChecker,
   157  	listenIP string, listenPort uint32) error {
   158  	ctx, cancel := context.WithCancel(context.Background())
   159  	p.cancel = cancel
   160  	p.host = listenIP
   161  	p.port = listenPort
   162  	p.conns = make([]*grpc.ClientConn, 0, streamNum)
   163  	p.puters = make([]*beatSender, 0, streamNum)
   164  	for i := 0; i < streamNum; i++ {
   165  		conn, err := grpc.DialContext(ctx, fmt.Sprintf("%s:%d", listenIP, listenPort),
   166  			grpc.WithBlock(),
   167  			grpc.WithInsecure(),
   168  		)
   169  		if err != nil {
   170  			_ = p.Close()
   171  			return err
   172  		}
   173  		p.conns = append(p.conns, conn)
   174  	}
   175  	for i := 0; i < streamNum; i++ {
   176  		client := apiservice.NewPolarisHeartbeatGRPCClient(p.conns[i])
   177  		puter, err := client.BatchHeartbeat(ctx, grpc.Header(&metadata.MD{
   178  			sendResource: []string{utils.LocalHost},
   179  		}))
   180  		if err != nil {
   181  			_ = p.Close()
   182  			return err
   183  		}
   184  		p.puters = append(p.puters, newBeatSender(ctx, p, puter))
   185  	}
   186  	p.Cache = newRemoteBeatRecordCache(p.GetFunc, p.PutFunc, p.DelFunc)
   187  	return nil
   188  }
   189  
   190  func (p *RemotePeer) Host() string {
   191  	return p.host
   192  }
   193  
   194  // Get get records
   195  func (p *RemotePeer) Get(key string) (*ReadBeatRecord, error) {
   196  	ret := p.Cache.Get(key)
   197  	return ret[key], nil
   198  }
   199  
   200  // Put put records
   201  func (p *RemotePeer) Put(record WriteBeatRecord) error {
   202  	p.Cache.Put(record)
   203  	return nil
   204  }
   205  
   206  // Del del records
   207  func (p *RemotePeer) Del(key string) error {
   208  	p.Cache.Del(key)
   209  	return nil
   210  }
   211  
   212  func (p *RemotePeer) GetFunc(req *apiservice.GetHeartbeatsRequest) *apiservice.GetHeartbeatsResponse {
   213  	start := time.Now()
   214  	code := "0"
   215  	defer func() {
   216  		observer := beatRecordCost.With(map[string]string{
   217  			labelAction: "GET",
   218  			labelCode:   code,
   219  		})
   220  		observer.Observe(float64(time.Since(start).Milliseconds()))
   221  	}()
   222  	client := p.choseOneClient()
   223  	resp, err := client.BatchGetHeartbeat(context.Background(), req, grpc.Header(&metadata.MD{
   224  		sendResource: []string{utils.LocalHost},
   225  	}))
   226  	if err != nil {
   227  		code = "-1"
   228  		plog.Error("[HealthCheck][Leader] send get record request", zap.String("host", p.Host()),
   229  			zap.Uint32("port", p.port), zap.Error(err))
   230  		return &apiservice.GetHeartbeatsResponse{}
   231  	}
   232  	return resp
   233  }
   234  
   235  func (p *RemotePeer) PutFunc(req *apiservice.HeartbeatsRequest) {
   236  	start := time.Now()
   237  	code := "0"
   238  	defer func() {
   239  		observer := beatRecordCost.With(map[string]string{
   240  			labelAction: "PUT",
   241  			labelCode:   code,
   242  		})
   243  		observer.Observe(float64(time.Since(start).Milliseconds()))
   244  	}()
   245  	index := rand.Intn(len(p.puters))
   246  	if err := p.puters[index].Send(req); err != nil {
   247  		code = "-1"
   248  		plog.Error("[HealthCheck][Leader] send put record request", zap.String("host", p.Host()),
   249  			zap.Uint32("port", p.port), zap.Error(err))
   250  	}
   251  }
   252  
   253  func (p *RemotePeer) DelFunc(req *apiservice.DelHeartbeatsRequest) {
   254  	start := time.Now()
   255  	code := "0"
   256  	defer func() {
   257  		observer := beatRecordCost.With(map[string]string{
   258  			labelAction: "DEL",
   259  			labelCode:   code,
   260  		})
   261  		observer.Observe(float64(time.Since(start).Milliseconds()))
   262  	}()
   263  	client := p.choseOneClient()
   264  	if _, err := client.BatchDelHeartbeat(context.Background(), req, grpc.Header(&metadata.MD{
   265  		sendResource: []string{utils.LocalHost},
   266  	})); err != nil {
   267  		code = "-1"
   268  		plog.Error("send del record request", zap.String("host", p.Host()),
   269  			zap.Uint32("port", p.port), zap.Error(err))
   270  	}
   271  }
   272  
   273  func (p *RemotePeer) choseOneClient() apiservice.PolarisHeartbeatGRPCClient {
   274  	index := rand.Intn(len(p.conns))
   275  	return apiservice.NewPolarisHeartbeatGRPCClient(p.conns[index])
   276  }
   277  
   278  func (p *RemotePeer) Storage() BeatRecordCache {
   279  	return p.Cache
   280  }
   281  
   282  // Close close peer life
   283  func (p *RemotePeer) Close() error {
   284  	if !atomic.CompareAndSwapInt32(&p.closed, 0, 1) {
   285  		return nil
   286  	}
   287  	if p.cancel != nil {
   288  		p.cancel()
   289  	}
   290  	if len(p.puters) != 0 {
   291  		for i := range p.puters {
   292  			_ = p.puters[i].close()
   293  		}
   294  	}
   295  	if len(p.conns) != 0 {
   296  		for i := range p.conns {
   297  			_ = p.conns[i].Close()
   298  		}
   299  	}
   300  	return nil
   301  }
   302  
   303  var (
   304  	ErrorRecordNotFound = errors.New("beat record not found")
   305  	ErrorPeerClosed     = errors.New("peer alrady closed")
   306  )
   307  
   308  // PeerWriteTask peer write task
   309  type PeerWriteTask struct {
   310  	Peer    *RemotePeer
   311  	Records []WriteBeatRecord
   312  }
   313  
   314  // PeerReadTask peer read task
   315  type PeerReadTask struct {
   316  	Peer    *RemotePeer
   317  	Keys    []string
   318  	Futures map[string][]batchjob.Future
   319  }
   320  
   321  type beatSender struct {
   322  	lock   sync.RWMutex
   323  	sender apiservice.PolarisHeartbeatGRPC_BatchHeartbeatClient
   324  }
   325  
   326  func newBeatSender(ctx context.Context, p *RemotePeer, sender apiservice.PolarisHeartbeatGRPC_BatchHeartbeatClient) *beatSender {
   327  	go func(ctx context.Context) {
   328  		for {
   329  			select {
   330  			case <-ctx.Done():
   331  				plog.Info("[HealthCheck][Leader] cancel receive put record result", zap.String("host", p.Host()),
   332  					zap.Uint32("port", p.port))
   333  				return
   334  			default:
   335  				if _, err := sender.Recv(); err != nil {
   336  					plog.Error("[HealthCheck][Leader] receive put record result", zap.String("host", p.Host()),
   337  						zap.Uint32("port", p.port), zap.Error(err))
   338  				}
   339  			}
   340  		}
   341  	}(ctx)
   342  
   343  	return &beatSender{
   344  		sender: sender,
   345  	}
   346  }
   347  
   348  func (s *beatSender) Send(req *apiservice.HeartbeatsRequest) error {
   349  	s.lock.Lock()
   350  	defer s.lock.Unlock()
   351  	return s.sender.Send(req)
   352  }
   353  
   354  func (s *beatSender) close() error {
   355  	return s.sender.CloseSend()
   356  }