github.com/matrixorigin/matrixone@v1.2.0/pkg/gossip/node.go (about) 1 // Copyright 2021 - 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gossip 16 17 import ( 18 "context" 19 pb "github.com/matrixorigin/matrixone/pkg/pb/statsinfo" 20 "io" 21 "net" 22 "strconv" 23 "sync/atomic" 24 "time" 25 26 "github.com/hashicorp/memberlist" 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/common/runtime" 29 "github.com/matrixorigin/matrixone/pkg/fileservice" 30 "github.com/matrixorigin/matrixone/pkg/pb/query" 31 "github.com/matrixorigin/matrixone/pkg/queryservice/client" 32 "go.uber.org/zap" 33 ) 34 35 const ( 36 defaultGossipNodes = 6 37 // We do not need to exchange the entire data of node. 38 defaultPushPullInterval = 0 39 defaultGossipInterval = time.Second 40 defaultUDPBufferSize = 4 * 1024 * 1024 // 4MB 41 defaultHandoffQueueDepth = 4096 42 ) 43 44 type Node struct { 45 ctx context.Context 46 // if the gossip node is created, set it to true. 47 created bool 48 nid string 49 logger *zap.Logger 50 list *memberlist.Memberlist 51 delegate *delegate 52 joined atomic.Bool 53 listenAddrFn func() string 54 serviceAddrFn func() string 55 cacheServerAddrFn func() string 56 } 57 58 func NewNode(ctx context.Context, nid string, opts ...Option) (*Node, error) { 59 rt := runtime.ProcessLevelRuntime() 60 if rt == nil { 61 rt = runtime.DefaultRuntime() 62 } 63 logger := rt.Logger().Named("gossip") 64 n := &Node{ctx: ctx, nid: nid, logger: logger.RawLogger()} 65 for _, opt := range opts { 66 opt(n) 67 } 68 return n, nil 69 } 70 71 func (n *Node) Create() error { 72 if n.cacheServerAddrFn == nil { 73 return moerr.NewInternalErrorNoCtx("cache service address not set") 74 } 75 n.delegate = newDelegate(n.logger, n.cacheServerAddrFn()) 76 cfg := memberlist.DefaultWANConfig() 77 cfg.Delegate = n.delegate 78 cfg.Events = n.delegate 79 cfg.Name = n.nid 80 cfg.PushPullInterval = defaultPushPullInterval 81 cfg.GossipInterval = defaultGossipInterval 82 cfg.GossipNodes = defaultGossipNodes 83 cfg.UDPBufferSize = defaultUDPBufferSize 84 cfg.HandoffQueueDepth = defaultHandoffQueueDepth 85 // Discard the gossip logs. 86 cfg.Logger = nil 87 cfg.LogOutput = io.Discard 88 89 listenAddr := n.listenAddrFn() 90 if len(listenAddr) == 0 { 91 n.logger.Error("cannot create gossip node, because listen address is empty") 92 return nil 93 } 94 95 serviceAddr := n.serviceAddrFn() 96 if len(serviceAddr) == 0 { 97 n.logger.Error("cannot create gossip node, because service address is empty") 98 return nil 99 } 100 101 // Set address that gossip uses. 102 bindAddr, bindPort, err := parseAddress(n.listenAddrFn()) 103 if err != nil { 104 return err 105 } 106 cfg.BindAddr = bindAddr 107 cfg.BindPort = bindPort 108 109 aAddr, aPort, err := parseAddress(serviceAddr) 110 if err != nil { 111 return err 112 } 113 cfg.AdvertiseAddr = aAddr 114 cfg.AdvertisePort = aPort 115 116 ml, err := memberlist.Create(cfg) 117 if err != nil { 118 return moerr.NewInternalError(n.ctx, "CN gossip create node failed: %s", err) 119 } 120 n.list = ml 121 n.created = true 122 return nil 123 } 124 125 func (n *Node) Join(existing []string) error { 126 if !n.created { 127 return moerr.NewInternalErrorNoCtx("cannot join gossip cluster, because node has not been created") 128 } 129 m, err := n.list.Join(existing) 130 if err != nil { 131 n.logger.Error("node failed to join cluster", 132 zap.String("node ID", n.nid), zap.Error(err)) 133 return err 134 } 135 n.logger.Info("node join cluster successfully", 136 zap.String("node ID", n.nid), 137 zap.Int("joined nodes", m)) 138 return nil 139 } 140 141 func (n *Node) Created() bool { 142 return n.created 143 } 144 145 func (n *Node) SetJoined() { 146 n.joined.Store(true) 147 } 148 149 func (n *Node) UnsetJoined() { 150 n.joined.Store(false) 151 } 152 153 func (n *Node) Joined() bool { 154 return n.joined.Load() 155 } 156 157 func (n *Node) Leave(timeout time.Duration) error { 158 if !n.created { 159 return nil 160 } 161 n.logger.Info("leaving gossip cluster", 162 zap.String("node ID", n.nid)) 163 if err := n.list.Leave(timeout); err != nil { 164 n.logger.Error("failed to leave gossip cluster", 165 zap.String("node ID", n.nid), 166 zap.Error(err)) 167 } 168 if err := n.list.Shutdown(); err != nil { 169 n.logger.Error("failed to shutdown gossip node", 170 zap.String("node ID", n.nid), 171 zap.Error(err)) 172 return err 173 } 174 return nil 175 } 176 177 func (n *Node) DistKeyCacheGetter() fileservice.KeyRouterFactory[query.CacheKey] { 178 return func() client.KeyRouter[query.CacheKey] { 179 if n.delegate != nil { 180 return n.delegate.getDataCacheKey() 181 } 182 return nil 183 } 184 } 185 186 func (n *Node) StatsKeyRouter() client.KeyRouter[pb.StatsInfoKey] { 187 if n != nil && n.delegate != nil { 188 return n.delegate.statsInfoKey 189 } 190 return nil 191 } 192 193 func (n *Node) NumMembers() int { 194 return n.list.NumMembers() 195 } 196 197 func parseAddress(addr string) (string, int, error) { 198 host, sp, err := net.SplitHostPort(addr) 199 if err != nil { 200 return "", 0, err 201 } 202 port, err := strconv.ParseUint(sp, 10, 16) 203 if err != nil { 204 return "", 0, err 205 } 206 return host, int(port), nil 207 }