github.com/smithx10/nomad@v0.9.1-rc1/client/servers/manager.go (about) 1 // Package servers provides an interface for choosing Servers to communicate 2 // with from a Nomad Client perspective. The package does not provide any API 3 // guarantees and should be called only by `hashicorp/nomad`. 4 package servers 5 6 import ( 7 "math/rand" 8 "net" 9 "sort" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/hashicorp/consul/lib" 15 hclog "github.com/hashicorp/go-hclog" 16 ) 17 18 const ( 19 // clientRPCMinReuseDuration controls the minimum amount of time RPC 20 // queries are sent over an established connection to a single server 21 clientRPCMinReuseDuration = 5 * time.Minute 22 23 // Limit the number of new connections a server receives per second 24 // for connection rebalancing. This limit caps the load caused by 25 // continual rebalancing efforts when a cluster is in equilibrium. A 26 // lower value comes at the cost of increased recovery time after a 27 // partition. This parameter begins to take effect when there are 28 // more than ~48K clients querying 5x servers or at lower server 29 // values when there is a partition. 30 // 31 // For example, in a 100K Nomad cluster with 5x servers, it will 32 // take ~5min for all servers to rebalance their connections. If 33 // 99,995 agents are in the minority talking to only one server, it 34 // will take ~26min for all servers to rebalance. A 10K cluster in 35 // the same scenario will take ~2.6min to rebalance. 36 newRebalanceConnsPerSecPerServer = 64 37 ) 38 39 // Pinger is an interface for pinging a server to see if it is healthy. 40 type Pinger interface { 41 Ping(addr net.Addr) error 42 } 43 44 // Server contains the address of a server and metadata that can be used for 45 // choosing a server to contact. 46 type Server struct { 47 // Addr is the resolved address of the server 48 Addr net.Addr 49 addr string 50 sync.Mutex 51 52 // DC is the datacenter of the server 53 DC string 54 } 55 56 func (s *Server) Copy() *Server { 57 s.Lock() 58 defer s.Unlock() 59 60 return &Server{ 61 Addr: s.Addr, 62 addr: s.addr, 63 DC: s.DC, 64 } 65 } 66 67 func (s *Server) String() string { 68 s.Lock() 69 defer s.Unlock() 70 71 if s.addr == "" { 72 s.addr = s.Addr.String() 73 } 74 75 return s.addr 76 } 77 78 func (s *Server) Equal(o *Server) bool { 79 if s == nil && o == nil { 80 return true 81 } else if s == nil && o != nil || s != nil && o == nil { 82 return false 83 } 84 85 return s.Addr.String() == o.Addr.String() && s.DC == o.DC 86 } 87 88 type Servers []*Server 89 90 func (s Servers) String() string { 91 addrs := make([]string, 0, len(s)) 92 for _, srv := range s { 93 addrs = append(addrs, srv.String()) 94 } 95 return strings.Join(addrs, ",") 96 } 97 98 // cycle cycles a list of servers in-place 99 func (s Servers) cycle() { 100 numServers := len(s) 101 if numServers < 2 { 102 return // No action required 103 } 104 105 start := s[0] 106 for i := 1; i < numServers; i++ { 107 s[i-1] = s[i] 108 } 109 s[numServers-1] = start 110 } 111 112 // shuffle shuffles the server list in place 113 func (s Servers) shuffle() { 114 for i := len(s) - 1; i > 0; i-- { 115 j := rand.Int31n(int32(i + 1)) 116 s[i], s[j] = s[j], s[i] 117 } 118 } 119 120 func (s Servers) Sort() { 121 sort.Slice(s, func(i, j int) bool { 122 a, b := s[i], s[j] 123 if addr1, addr2 := a.Addr.String(), b.Addr.String(); addr1 == addr2 { 124 return a.DC < b.DC 125 } else { 126 return addr1 < addr2 127 } 128 }) 129 } 130 131 // Equal returns if the two server lists are equal, including the ordering. 132 func (s Servers) Equal(o Servers) bool { 133 if len(s) != len(o) { 134 return false 135 } 136 137 for i, v := range s { 138 if !v.Equal(o[i]) { 139 return false 140 } 141 } 142 143 return true 144 } 145 146 type Manager struct { 147 // servers is the list of all known Nomad servers. 148 servers Servers 149 150 // rebalanceTimer controls the duration of the rebalance interval 151 rebalanceTimer *time.Timer 152 153 // shutdownCh is a copy of the channel in Nomad.Client 154 shutdownCh chan struct{} 155 156 // numNodes is used to estimate the approximate number of nodes in 157 // a cluster and limit the rate at which it rebalances server 158 // connections. This should be read and set using atomic. 159 numNodes int32 160 161 // connPoolPinger is used to test the health of a server in the connection 162 // pool. Pinger is an interface that wraps client.ConnPool. 163 connPoolPinger Pinger 164 165 logger hclog.Logger 166 167 sync.Mutex 168 } 169 170 // New is the only way to safely create a new Manager struct. 171 func New(logger hclog.Logger, shutdownCh chan struct{}, connPoolPinger Pinger) (m *Manager) { 172 logger = logger.Named("server_mgr") 173 return &Manager{ 174 logger: logger, 175 connPoolPinger: connPoolPinger, 176 rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration), 177 shutdownCh: shutdownCh, 178 } 179 } 180 181 // Start is used to start and manage the task of automatically shuffling and 182 // rebalancing the list of Nomad servers in order to distribute load across 183 // all known and available Nomad servers. 184 func (m *Manager) Start() { 185 for { 186 select { 187 case <-m.rebalanceTimer.C: 188 m.RebalanceServers() 189 m.refreshServerRebalanceTimer() 190 191 case <-m.shutdownCh: 192 m.logger.Debug("shutting down") 193 return 194 } 195 } 196 } 197 198 // SetServers sets the servers and returns if the new server list is different 199 // than the existing server set 200 func (m *Manager) SetServers(servers Servers) bool { 201 m.Lock() 202 defer m.Unlock() 203 204 // Sort both the existing and incoming servers 205 servers.Sort() 206 m.servers.Sort() 207 208 // Determine if they are equal 209 equal := servers.Equal(m.servers) 210 211 // Randomize the incoming servers 212 servers.shuffle() 213 m.servers = servers 214 215 return !equal 216 } 217 218 // FindServer returns a server to send an RPC too. If there are no servers, nil 219 // is returned. 220 func (m *Manager) FindServer() *Server { 221 m.Lock() 222 defer m.Unlock() 223 224 if len(m.servers) == 0 { 225 m.logger.Warn("no servers available") 226 return nil 227 } 228 229 // Return whatever is at the front of the list because it is 230 // assumed to be the oldest in the server list (unless - 231 // hypothetically - the server list was rotated right after a 232 // server was added). 233 return m.servers[0] 234 } 235 236 // NumNodes returns the number of approximate nodes in the cluster. 237 func (m *Manager) NumNodes() int32 { 238 m.Lock() 239 defer m.Unlock() 240 return m.numNodes 241 } 242 243 // SetNumNodes stores the number of approximate nodes in the cluster. 244 func (m *Manager) SetNumNodes(n int32) { 245 m.Lock() 246 defer m.Unlock() 247 m.numNodes = n 248 } 249 250 // NotifyFailedServer marks the passed in server as "failed" by rotating it 251 // to the end of the server list. 252 func (m *Manager) NotifyFailedServer(s *Server) { 253 m.Lock() 254 defer m.Unlock() 255 256 // If the server being failed is not the first server on the list, 257 // this is a noop. If, however, the server is failed and first on 258 // the list, move the server to the end of the list. 259 if len(m.servers) > 1 && m.servers[0].Equal(s) { 260 m.servers.cycle() 261 } 262 } 263 264 // NumServers returns the total number of known servers whether healthy or not. 265 func (m *Manager) NumServers() int { 266 m.Lock() 267 defer m.Unlock() 268 return len(m.servers) 269 } 270 271 // GetServers returns a copy of the current list of servers. 272 func (m *Manager) GetServers() Servers { 273 m.Lock() 274 defer m.Unlock() 275 276 copy := make([]*Server, 0, len(m.servers)) 277 for _, s := range m.servers { 278 copy = append(copy, s.Copy()) 279 } 280 281 return copy 282 } 283 284 // RebalanceServers shuffles the order in which Servers will be contacted. The 285 // function will shuffle the set of potential servers to contact and then attempt 286 // to contact each server. If a server successfully responds it is used, otherwise 287 // it is rotated such that it will be the last attempted server. 288 func (m *Manager) RebalanceServers() { 289 // Shuffle servers so we have a chance of picking a new one. 290 servers := m.GetServers() 291 servers.shuffle() 292 293 // Iterate through the shuffled server list to find an assumed 294 // healthy server. NOTE: Do not iterate on the list directly because 295 // this loop mutates the server list in-place. 296 var foundHealthyServer bool 297 for i := 0; i < len(m.servers); i++ { 298 // Always test the first server. Failed servers are cycled 299 // while Serf detects the node has failed. 300 srv := servers[0] 301 302 err := m.connPoolPinger.Ping(srv.Addr) 303 if err == nil { 304 foundHealthyServer = true 305 break 306 } 307 m.logger.Debug("error pinging server", "error", err, "server", srv) 308 309 servers.cycle() 310 } 311 312 if !foundHealthyServer { 313 m.logger.Debug("no healthy servers during rebalance") 314 return 315 } 316 317 // Save the servers 318 m.Lock() 319 m.servers = servers 320 m.Unlock() 321 } 322 323 // refreshServerRebalanceTimer is only called once m.rebalanceTimer expires. 324 func (m *Manager) refreshServerRebalanceTimer() time.Duration { 325 m.Lock() 326 defer m.Unlock() 327 numServers := len(m.servers) 328 329 // Limit this connection's life based on the size (and health) of the 330 // cluster. Never rebalance a connection more frequently than 331 // connReuseLowWatermarkDuration, and make sure we never exceed 332 // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. 333 clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer) 334 335 connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, clientRPCMinReuseDuration, int(m.numNodes)) 336 connRebalanceTimeout += lib.RandomStagger(connRebalanceTimeout) 337 338 m.rebalanceTimer.Reset(connRebalanceTimeout) 339 return connRebalanceTimeout 340 } 341 342 // ResetRebalanceTimer resets the rebalance timer. This method exists for 343 // testing and should not be used directly. 344 func (m *Manager) ResetRebalanceTimer() { 345 m.Lock() 346 defer m.Unlock() 347 m.rebalanceTimer.Reset(clientRPCMinReuseDuration) 348 }