github.com/anuvu/nomad@v0.8.7-atom1/client/servers/manager.go (about) 1 // Package servers provides an interface for choosing Servers to communicate 2 // with from a Nomad Client perspective. The package does not provide any API 3 // guarantees and should be called only by `hashicorp/nomad`. 4 package servers 5 6 import ( 7 "log" 8 "math/rand" 9 "net" 10 "sort" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/hashicorp/consul/lib" 16 ) 17 18 const ( 19 // clientRPCMinReuseDuration controls the minimum amount of time RPC 20 // queries are sent over an established connection to a single server 21 clientRPCMinReuseDuration = 5 * time.Minute 22 23 // Limit the number of new connections a server receives per second 24 // for connection rebalancing. This limit caps the load caused by 25 // continual rebalancing efforts when a cluster is in equilibrium. A 26 // lower value comes at the cost of increased recovery time after a 27 // partition. This parameter begins to take effect when there are 28 // more than ~48K clients querying 5x servers or at lower server 29 // values when there is a partition. 30 // 31 // For example, in a 100K Nomad cluster with 5x servers, it will 32 // take ~5min for all servers to rebalance their connections. If 33 // 99,995 agents are in the minority talking to only one server, it 34 // will take ~26min for all servers to rebalance. A 10K cluster in 35 // the same scenario will take ~2.6min to rebalance. 36 newRebalanceConnsPerSecPerServer = 64 37 ) 38 39 // Pinger is an interface for pinging a server to see if it is healthy. 40 type Pinger interface { 41 Ping(addr net.Addr) error 42 } 43 44 // Server contains the address of a server and metadata that can be used for 45 // choosing a server to contact. 46 type Server struct { 47 // Addr is the resolved address of the server 48 Addr net.Addr 49 addr string 50 sync.Mutex 51 52 // DC is the datacenter of the server 53 DC string 54 } 55 56 func (s *Server) Copy() *Server { 57 s.Lock() 58 defer s.Unlock() 59 60 return &Server{ 61 Addr: s.Addr, 62 addr: s.addr, 63 DC: s.DC, 64 } 65 } 66 67 func (s *Server) String() string { 68 s.Lock() 69 defer s.Unlock() 70 71 if s.addr == "" { 72 s.addr = s.Addr.String() 73 } 74 75 return s.addr 76 } 77 78 func (s *Server) Equal(o *Server) bool { 79 if s == nil && o == nil { 80 return true 81 } else if s == nil && o != nil || s != nil && o == nil { 82 return false 83 } 84 85 return s.Addr.String() == o.Addr.String() && s.DC == o.DC 86 } 87 88 type Servers []*Server 89 90 func (s Servers) String() string { 91 addrs := make([]string, 0, len(s)) 92 for _, srv := range s { 93 addrs = append(addrs, srv.String()) 94 } 95 return strings.Join(addrs, ",") 96 } 97 98 // cycle cycles a list of servers in-place 99 func (s Servers) cycle() { 100 numServers := len(s) 101 if numServers < 2 { 102 return // No action required 103 } 104 105 start := s[0] 106 for i := 1; i < numServers; i++ { 107 s[i-1] = s[i] 108 } 109 s[numServers-1] = start 110 } 111 112 // shuffle shuffles the server list in place 113 func (s Servers) shuffle() { 114 for i := len(s) - 1; i > 0; i-- { 115 j := rand.Int31n(int32(i + 1)) 116 s[i], s[j] = s[j], s[i] 117 } 118 } 119 120 func (s Servers) Sort() { 121 sort.Slice(s, func(i, j int) bool { 122 a, b := s[i], s[j] 123 if addr1, addr2 := a.Addr.String(), b.Addr.String(); addr1 == addr2 { 124 return a.DC < b.DC 125 } else { 126 return addr1 < addr2 127 } 128 }) 129 } 130 131 // Equal returns if the two server lists are equal, including the ordering. 132 func (s Servers) Equal(o Servers) bool { 133 if len(s) != len(o) { 134 return false 135 } 136 137 for i, v := range s { 138 if !v.Equal(o[i]) { 139 return false 140 } 141 } 142 143 return true 144 } 145 146 type Manager struct { 147 // servers is the list of all known Nomad servers. 148 servers Servers 149 150 // rebalanceTimer controls the duration of the rebalance interval 151 rebalanceTimer *time.Timer 152 153 // shutdownCh is a copy of the channel in Nomad.Client 154 shutdownCh chan struct{} 155 156 logger *log.Logger 157 158 // numNodes is used to estimate the approximate number of nodes in 159 // a cluster and limit the rate at which it rebalances server 160 // connections. This should be read and set using atomic. 161 numNodes int32 162 163 // connPoolPinger is used to test the health of a server in the connection 164 // pool. Pinger is an interface that wraps client.ConnPool. 165 connPoolPinger Pinger 166 167 sync.Mutex 168 } 169 170 // New is the only way to safely create a new Manager struct. 171 func New(logger *log.Logger, shutdownCh chan struct{}, connPoolPinger Pinger) (m *Manager) { 172 return &Manager{ 173 logger: logger, 174 connPoolPinger: connPoolPinger, 175 rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration), 176 shutdownCh: shutdownCh, 177 } 178 } 179 180 // Start is used to start and manage the task of automatically shuffling and 181 // rebalancing the list of Nomad servers in order to distribute load across 182 // all known and available Nomad servers. 183 func (m *Manager) Start() { 184 for { 185 select { 186 case <-m.rebalanceTimer.C: 187 m.RebalanceServers() 188 m.refreshServerRebalanceTimer() 189 190 case <-m.shutdownCh: 191 m.logger.Printf("[DEBUG] manager: shutting down") 192 return 193 } 194 } 195 } 196 197 // SetServers sets the servers and returns if the new server list is different 198 // than the existing server set 199 func (m *Manager) SetServers(servers Servers) bool { 200 m.Lock() 201 defer m.Unlock() 202 203 // Sort both the existing and incoming servers 204 servers.Sort() 205 m.servers.Sort() 206 207 // Determine if they are equal 208 equal := servers.Equal(m.servers) 209 210 // Randomize the incoming servers 211 servers.shuffle() 212 m.servers = servers 213 214 return !equal 215 } 216 217 // FindServer returns a server to send an RPC too. If there are no servers, nil 218 // is returned. 219 func (m *Manager) FindServer() *Server { 220 m.Lock() 221 defer m.Unlock() 222 223 if len(m.servers) == 0 { 224 m.logger.Printf("[WARN] manager: No servers available") 225 return nil 226 } 227 228 // Return whatever is at the front of the list because it is 229 // assumed to be the oldest in the server list (unless - 230 // hypothetically - the server list was rotated right after a 231 // server was added). 232 return m.servers[0] 233 } 234 235 // NumNodes returns the number of approximate nodes in the cluster. 236 func (m *Manager) NumNodes() int32 { 237 m.Lock() 238 defer m.Unlock() 239 return m.numNodes 240 } 241 242 // SetNumNodes stores the number of approximate nodes in the cluster. 243 func (m *Manager) SetNumNodes(n int32) { 244 m.Lock() 245 defer m.Unlock() 246 m.numNodes = n 247 } 248 249 // NotifyFailedServer marks the passed in server as "failed" by rotating it 250 // to the end of the server list. 251 func (m *Manager) NotifyFailedServer(s *Server) { 252 m.Lock() 253 defer m.Unlock() 254 255 // If the server being failed is not the first server on the list, 256 // this is a noop. If, however, the server is failed and first on 257 // the list, move the server to the end of the list. 258 if len(m.servers) > 1 && m.servers[0].Equal(s) { 259 m.servers.cycle() 260 } 261 } 262 263 // NumServers returns the total number of known servers whether healthy or not. 264 func (m *Manager) NumServers() int { 265 m.Lock() 266 defer m.Unlock() 267 return len(m.servers) 268 } 269 270 // GetServers returns a copy of the current list of servers. 271 func (m *Manager) GetServers() Servers { 272 m.Lock() 273 defer m.Unlock() 274 275 copy := make([]*Server, 0, len(m.servers)) 276 for _, s := range m.servers { 277 copy = append(copy, s.Copy()) 278 } 279 280 return copy 281 } 282 283 // RebalanceServers shuffles the order in which Servers will be contacted. The 284 // function will shuffle the set of potential servers to contact and then attempt 285 // to contact each server. If a server successfully responds it is used, otherwise 286 // it is rotated such that it will be the last attempted server. 287 func (m *Manager) RebalanceServers() { 288 // Shuffle servers so we have a chance of picking a new one. 289 servers := m.GetServers() 290 servers.shuffle() 291 292 // Iterate through the shuffled server list to find an assumed 293 // healthy server. NOTE: Do not iterate on the list directly because 294 // this loop mutates the server list in-place. 295 var foundHealthyServer bool 296 for i := 0; i < len(m.servers); i++ { 297 // Always test the first server. Failed servers are cycled 298 // while Serf detects the node has failed. 299 srv := servers[0] 300 301 err := m.connPoolPinger.Ping(srv.Addr) 302 if err == nil { 303 foundHealthyServer = true 304 break 305 } 306 m.logger.Printf(`[DEBUG] manager: pinging server "%s" failed: %s`, srv, err) 307 308 servers.cycle() 309 } 310 311 if !foundHealthyServer { 312 m.logger.Printf("[DEBUG] manager: No healthy servers during rebalance") 313 return 314 } 315 316 // Save the servers 317 m.Lock() 318 m.servers = servers 319 m.Unlock() 320 } 321 322 // refreshServerRebalanceTimer is only called once m.rebalanceTimer expires. 323 func (m *Manager) refreshServerRebalanceTimer() time.Duration { 324 m.Lock() 325 defer m.Unlock() 326 numServers := len(m.servers) 327 328 // Limit this connection's life based on the size (and health) of the 329 // cluster. Never rebalance a connection more frequently than 330 // connReuseLowWatermarkDuration, and make sure we never exceed 331 // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. 332 clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer) 333 334 connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, clientRPCMinReuseDuration, int(m.numNodes)) 335 connRebalanceTimeout += lib.RandomStagger(connRebalanceTimeout) 336 337 m.rebalanceTimer.Reset(connRebalanceTimeout) 338 return connRebalanceTimeout 339 } 340 341 // ResetRebalanceTimer resets the rebalance timer. This method exists for 342 // testing and should not be used directly. 343 func (m *Manager) ResetRebalanceTimer() { 344 m.Lock() 345 defer m.Unlock() 346 m.rebalanceTimer.Reset(clientRPCMinReuseDuration) 347 }