github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/servers/manager.go (about) 1 // Package servers provides an interface for choosing Servers to communicate 2 // with from a Nomad Client perspective. The package does not provide any API 3 // guarantees and should be called only by `hashicorp/nomad`. 4 package servers 5 6 import ( 7 "math/rand" 8 "net" 9 "sort" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/hashicorp/consul/lib" 15 hclog "github.com/hashicorp/go-hclog" 16 ) 17 18 const ( 19 // clientRPCMinReuseDuration controls the minimum amount of time RPC 20 // queries are sent over an established connection to a single server 21 clientRPCMinReuseDuration = 5 * time.Minute 22 23 // Limit the number of new connections a server receives per second 24 // for connection rebalancing. This limit caps the load caused by 25 // continual rebalancing efforts when a cluster is in equilibrium. A 26 // lower value comes at the cost of increased recovery time after a 27 // partition. This parameter begins to take effect when there are 28 // more than ~48K clients querying 5x servers or at lower server 29 // values when there is a partition. 30 // 31 // For example, in a 100K Nomad cluster with 5x servers, it will 32 // take ~5min for all servers to rebalance their connections. If 33 // 99,995 agents are in the minority talking to only one server, it 34 // will take ~26min for all servers to rebalance. A 10K cluster in 35 // the same scenario will take ~2.6min to rebalance. 36 newRebalanceConnsPerSecPerServer = 64 37 ) 38 39 // Pinger is an interface for pinging a server to see if it is healthy. 40 type Pinger interface { 41 Ping(addr net.Addr) error 42 } 43 44 // Server contains the address of a server and metadata that can be used for 45 // choosing a server to contact. 46 type Server struct { 47 // Addr is the resolved address of the server 48 Addr net.Addr 49 addr string 50 sync.Mutex 51 } 52 53 func (s *Server) Copy() *Server { 54 s.Lock() 55 defer s.Unlock() 56 57 return &Server{ 58 Addr: s.Addr, 59 addr: s.addr, 60 } 61 } 62 63 func (s *Server) String() string { 64 s.Lock() 65 defer s.Unlock() 66 67 if s.addr == "" { 68 s.addr = s.Addr.String() 69 } 70 71 return s.addr 72 } 73 74 func (s *Server) Equal(o *Server) bool { 75 if s == nil && o == nil { 76 return true 77 } else if s == nil && o != nil || s != nil && o == nil { 78 return false 79 } 80 81 return s.Addr.String() == o.Addr.String() 82 } 83 84 type Servers []*Server 85 86 func (s Servers) String() string { 87 addrs := make([]string, 0, len(s)) 88 for _, srv := range s { 89 addrs = append(addrs, srv.String()) 90 } 91 return strings.Join(addrs, ",") 92 } 93 94 // cycle cycles a list of servers in-place 95 func (s Servers) cycle() { 96 numServers := len(s) 97 if numServers < 2 { 98 return // No action required 99 } 100 101 start := s[0] 102 for i := 1; i < numServers; i++ { 103 s[i-1] = s[i] 104 } 105 s[numServers-1] = start 106 } 107 108 // shuffle shuffles the server list in place 109 func (s Servers) shuffle() { 110 for i := len(s) - 1; i > 0; i-- { 111 j := rand.Int31n(int32(i + 1)) 112 s[i], s[j] = s[j], s[i] 113 } 114 } 115 116 func (s Servers) Sort() { 117 sort.Slice(s, func(i, j int) bool { 118 return s[i].String() < s[j].String() 119 }) 120 } 121 122 // Equal returns if the two server lists are equal, including the ordering. 123 func (s Servers) Equal(o Servers) bool { 124 if len(s) != len(o) { 125 return false 126 } 127 128 for i, v := range s { 129 if !v.Equal(o[i]) { 130 return false 131 } 132 } 133 134 return true 135 } 136 137 type Manager struct { 138 // servers is the list of all known Nomad servers. 139 servers Servers 140 141 // rebalanceTimer controls the duration of the rebalance interval 142 rebalanceTimer *time.Timer 143 144 // shutdownCh is a copy of the channel in Nomad.Client 145 shutdownCh chan struct{} 146 147 // numNodes is used to estimate the approximate number of nodes in 148 // a cluster and limit the rate at which it rebalances server 149 // connections. This should be read and set using atomic. 150 numNodes int32 151 152 // connPoolPinger is used to test the health of a server in the connection 153 // pool. Pinger is an interface that wraps client.ConnPool. 154 connPoolPinger Pinger 155 156 logger hclog.Logger 157 158 sync.Mutex 159 } 160 161 // New is the only way to safely create a new Manager struct. 162 func New(logger hclog.Logger, shutdownCh chan struct{}, connPoolPinger Pinger) (m *Manager) { 163 logger = logger.Named("server_mgr") 164 return &Manager{ 165 logger: logger, 166 connPoolPinger: connPoolPinger, 167 rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration), 168 shutdownCh: shutdownCh, 169 } 170 } 171 172 // Start is used to start and manage the task of automatically shuffling and 173 // rebalancing the list of Nomad servers in order to distribute load across 174 // all known and available Nomad servers. 175 func (m *Manager) Start() { 176 for { 177 select { 178 case <-m.rebalanceTimer.C: 179 m.RebalanceServers() 180 m.refreshServerRebalanceTimer() 181 182 case <-m.shutdownCh: 183 m.logger.Debug("shutting down") 184 return 185 } 186 } 187 } 188 189 // SetServers sets the servers and returns if the new server list is different 190 // than the existing server set 191 func (m *Manager) SetServers(servers Servers) bool { 192 m.Lock() 193 defer m.Unlock() 194 195 // Determine if they are equal 196 equal := m.serversAreEqual(servers) 197 198 // If server list is equal don't change the list and return immediately 199 // This prevents unnecessary shuffling of a failed server that was moved to the 200 // bottom of the list 201 if equal { 202 return !equal 203 } 204 205 m.logger.Debug("new server list", "new_servers", servers, "old_servers", m.servers) 206 207 // Randomize the incoming servers 208 servers.shuffle() 209 m.servers = servers 210 211 return !equal 212 } 213 214 // Method to check if the arg list of servers is equal to the one we already have 215 func (m *Manager) serversAreEqual(servers Servers) bool { 216 // We use a copy of the server list here because determining 217 // equality requires a sort step which modifies the order of the server list 218 var copy Servers 219 copy = make([]*Server, 0, len(m.servers)) 220 for _, s := range m.servers { 221 copy = append(copy, s.Copy()) 222 } 223 224 // Sort both the existing and incoming servers 225 copy.Sort() 226 servers.Sort() 227 228 return copy.Equal(servers) 229 } 230 231 // FindServer returns a server to send an RPC too. If there are no servers, nil 232 // is returned. 233 func (m *Manager) FindServer() *Server { 234 m.Lock() 235 defer m.Unlock() 236 237 if len(m.servers) == 0 { 238 m.logger.Warn("no servers available") 239 return nil 240 } 241 242 // Return whatever is at the front of the list because it is 243 // assumed to be the oldest in the server list (unless - 244 // hypothetically - the server list was rotated right after a 245 // server was added). 246 return m.servers[0] 247 } 248 249 // NumNodes returns the number of approximate nodes in the cluster. 250 func (m *Manager) NumNodes() int32 { 251 m.Lock() 252 defer m.Unlock() 253 return m.numNodes 254 } 255 256 // SetNumNodes stores the number of approximate nodes in the cluster. 257 func (m *Manager) SetNumNodes(n int32) { 258 m.Lock() 259 defer m.Unlock() 260 m.numNodes = n 261 } 262 263 // NotifyFailedServer marks the passed in server as "failed" by rotating it 264 // to the end of the server list. 265 func (m *Manager) NotifyFailedServer(s *Server) { 266 m.Lock() 267 defer m.Unlock() 268 269 // If the server being failed is not the first server on the list, 270 // this is a noop. If, however, the server is failed and first on 271 // the list, move the server to the end of the list. 272 if len(m.servers) > 1 && m.servers[0].Equal(s) { 273 m.servers.cycle() 274 } 275 } 276 277 // NumServers returns the total number of known servers whether healthy or not. 278 func (m *Manager) NumServers() int { 279 m.Lock() 280 defer m.Unlock() 281 return len(m.servers) 282 } 283 284 // GetServers returns a copy of the current list of servers. 285 func (m *Manager) GetServers() Servers { 286 m.Lock() 287 defer m.Unlock() 288 289 copy := make([]*Server, 0, len(m.servers)) 290 for _, s := range m.servers { 291 copy = append(copy, s.Copy()) 292 } 293 294 return copy 295 } 296 297 // RebalanceServers shuffles the order in which Servers will be contacted. The 298 // function will shuffle the set of potential servers to contact and then attempt 299 // to contact each server. If a server successfully responds it is used, otherwise 300 // it is rotated such that it will be the last attempted server. 301 func (m *Manager) RebalanceServers() { 302 // Shuffle servers so we have a chance of picking a new one. 303 servers := m.GetServers() 304 servers.shuffle() 305 306 // Iterate through the shuffled server list to find an assumed 307 // healthy server. NOTE: Do not iterate on the list directly because 308 // this loop mutates the server list in-place. 309 var foundHealthyServer bool 310 for i := 0; i < len(m.servers); i++ { 311 // Always test the first server. Failed servers are cycled 312 // while Serf detects the node has failed. 313 srv := servers[0] 314 315 err := m.connPoolPinger.Ping(srv.Addr) 316 if err == nil { 317 foundHealthyServer = true 318 break 319 } 320 m.logger.Debug("error pinging server", "error", err, "server", srv) 321 322 servers.cycle() 323 } 324 325 if !foundHealthyServer { 326 m.logger.Debug("no healthy servers during rebalance") 327 return 328 } 329 330 // Save the servers 331 m.Lock() 332 m.servers = servers 333 m.Unlock() 334 } 335 336 // refreshServerRebalanceTimer is only called once m.rebalanceTimer expires. 337 func (m *Manager) refreshServerRebalanceTimer() time.Duration { 338 m.Lock() 339 defer m.Unlock() 340 numServers := len(m.servers) 341 342 // Limit this connection's life based on the size (and health) of the 343 // cluster. Never rebalance a connection more frequently than 344 // connReuseLowWatermarkDuration, and make sure we never exceed 345 // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. 346 clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer) 347 348 connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, clientRPCMinReuseDuration, int(m.numNodes)) 349 connRebalanceTimeout += lib.RandomStagger(connRebalanceTimeout) 350 351 m.rebalanceTimer.Reset(connRebalanceTimeout) 352 return connRebalanceTimeout 353 } 354 355 // ResetRebalanceTimer resets the rebalance timer. This method exists for 356 // testing and should not be used directly. 357 func (m *Manager) ResetRebalanceTimer() { 358 m.Lock() 359 defer m.Unlock() 360 m.rebalanceTimer.Reset(clientRPCMinReuseDuration) 361 }