github.com/matrixorigin/matrixone@v1.2.0/pkg/gossip/node.go (about)

     1  // Copyright 2021 - 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gossip
    16  
    17  import (
    18  	"context"
    19  	pb "github.com/matrixorigin/matrixone/pkg/pb/statsinfo"
    20  	"io"
    21  	"net"
    22  	"strconv"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/hashicorp/memberlist"
    27  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    28  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    29  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/query"
    31  	"github.com/matrixorigin/matrixone/pkg/queryservice/client"
    32  	"go.uber.org/zap"
    33  )
    34  
    35  const (
    36  	defaultGossipNodes = 6
    37  	// We do not need to exchange the entire data of node.
    38  	defaultPushPullInterval  = 0
    39  	defaultGossipInterval    = time.Second
    40  	defaultUDPBufferSize     = 4 * 1024 * 1024 // 4MB
    41  	defaultHandoffQueueDepth = 4096
    42  )
    43  
    44  type Node struct {
    45  	ctx context.Context
    46  	// if the gossip node is created, set it to true.
    47  	created           bool
    48  	nid               string
    49  	logger            *zap.Logger
    50  	list              *memberlist.Memberlist
    51  	delegate          *delegate
    52  	joined            atomic.Bool
    53  	listenAddrFn      func() string
    54  	serviceAddrFn     func() string
    55  	cacheServerAddrFn func() string
    56  }
    57  
    58  func NewNode(ctx context.Context, nid string, opts ...Option) (*Node, error) {
    59  	rt := runtime.ProcessLevelRuntime()
    60  	if rt == nil {
    61  		rt = runtime.DefaultRuntime()
    62  	}
    63  	logger := rt.Logger().Named("gossip")
    64  	n := &Node{ctx: ctx, nid: nid, logger: logger.RawLogger()}
    65  	for _, opt := range opts {
    66  		opt(n)
    67  	}
    68  	return n, nil
    69  }
    70  
    71  func (n *Node) Create() error {
    72  	if n.cacheServerAddrFn == nil {
    73  		return moerr.NewInternalErrorNoCtx("cache service address not set")
    74  	}
    75  	n.delegate = newDelegate(n.logger, n.cacheServerAddrFn())
    76  	cfg := memberlist.DefaultWANConfig()
    77  	cfg.Delegate = n.delegate
    78  	cfg.Events = n.delegate
    79  	cfg.Name = n.nid
    80  	cfg.PushPullInterval = defaultPushPullInterval
    81  	cfg.GossipInterval = defaultGossipInterval
    82  	cfg.GossipNodes = defaultGossipNodes
    83  	cfg.UDPBufferSize = defaultUDPBufferSize
    84  	cfg.HandoffQueueDepth = defaultHandoffQueueDepth
    85  	// Discard the gossip logs.
    86  	cfg.Logger = nil
    87  	cfg.LogOutput = io.Discard
    88  
    89  	listenAddr := n.listenAddrFn()
    90  	if len(listenAddr) == 0 {
    91  		n.logger.Error("cannot create gossip node, because listen address is empty")
    92  		return nil
    93  	}
    94  
    95  	serviceAddr := n.serviceAddrFn()
    96  	if len(serviceAddr) == 0 {
    97  		n.logger.Error("cannot create gossip node, because service address is empty")
    98  		return nil
    99  	}
   100  
   101  	// Set address that gossip uses.
   102  	bindAddr, bindPort, err := parseAddress(n.listenAddrFn())
   103  	if err != nil {
   104  		return err
   105  	}
   106  	cfg.BindAddr = bindAddr
   107  	cfg.BindPort = bindPort
   108  
   109  	aAddr, aPort, err := parseAddress(serviceAddr)
   110  	if err != nil {
   111  		return err
   112  	}
   113  	cfg.AdvertiseAddr = aAddr
   114  	cfg.AdvertisePort = aPort
   115  
   116  	ml, err := memberlist.Create(cfg)
   117  	if err != nil {
   118  		return moerr.NewInternalError(n.ctx, "CN gossip create node failed: %s", err)
   119  	}
   120  	n.list = ml
   121  	n.created = true
   122  	return nil
   123  }
   124  
   125  func (n *Node) Join(existing []string) error {
   126  	if !n.created {
   127  		return moerr.NewInternalErrorNoCtx("cannot join gossip cluster, because node has not been created")
   128  	}
   129  	m, err := n.list.Join(existing)
   130  	if err != nil {
   131  		n.logger.Error("node failed to join cluster",
   132  			zap.String("node ID", n.nid), zap.Error(err))
   133  		return err
   134  	}
   135  	n.logger.Info("node join cluster successfully",
   136  		zap.String("node ID", n.nid),
   137  		zap.Int("joined nodes", m))
   138  	return nil
   139  }
   140  
   141  func (n *Node) Created() bool {
   142  	return n.created
   143  }
   144  
   145  func (n *Node) SetJoined() {
   146  	n.joined.Store(true)
   147  }
   148  
   149  func (n *Node) UnsetJoined() {
   150  	n.joined.Store(false)
   151  }
   152  
   153  func (n *Node) Joined() bool {
   154  	return n.joined.Load()
   155  }
   156  
   157  func (n *Node) Leave(timeout time.Duration) error {
   158  	if !n.created {
   159  		return nil
   160  	}
   161  	n.logger.Info("leaving gossip cluster",
   162  		zap.String("node ID", n.nid))
   163  	if err := n.list.Leave(timeout); err != nil {
   164  		n.logger.Error("failed to leave gossip cluster",
   165  			zap.String("node ID", n.nid),
   166  			zap.Error(err))
   167  	}
   168  	if err := n.list.Shutdown(); err != nil {
   169  		n.logger.Error("failed to shutdown gossip node",
   170  			zap.String("node ID", n.nid),
   171  			zap.Error(err))
   172  		return err
   173  	}
   174  	return nil
   175  }
   176  
   177  func (n *Node) DistKeyCacheGetter() fileservice.KeyRouterFactory[query.CacheKey] {
   178  	return func() client.KeyRouter[query.CacheKey] {
   179  		if n.delegate != nil {
   180  			return n.delegate.getDataCacheKey()
   181  		}
   182  		return nil
   183  	}
   184  }
   185  
   186  func (n *Node) StatsKeyRouter() client.KeyRouter[pb.StatsInfoKey] {
   187  	if n != nil && n.delegate != nil {
   188  		return n.delegate.statsInfoKey
   189  	}
   190  	return nil
   191  }
   192  
   193  func (n *Node) NumMembers() int {
   194  	return n.list.NumMembers()
   195  }
   196  
   197  func parseAddress(addr string) (string, int, error) {
   198  	host, sp, err := net.SplitHostPort(addr)
   199  	if err != nil {
   200  		return "", 0, err
   201  	}
   202  	port, err := strconv.ParseUint(sp, 10, 16)
   203  	if err != nil {
   204  		return "", 0, err
   205  	}
   206  	return host, int(port), nil
   207  }