github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/client/servers/manager.go (about) 1 // Package servers provides an interface for choosing Servers to communicate 2 // with from a Nomad Client perspective. The package does not provide any API 3 // guarantees and should be called only by `hashicorp/nomad`. 4 package servers 5 6 import ( 7 "log" 8 "math/rand" 9 "net" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/hashicorp/consul/lib" 15 ) 16 17 const ( 18 // clientRPCMinReuseDuration controls the minimum amount of time RPC 19 // queries are sent over an established connection to a single server 20 clientRPCMinReuseDuration = 5 * time.Minute 21 22 // Limit the number of new connections a server receives per second 23 // for connection rebalancing. This limit caps the load caused by 24 // continual rebalancing efforts when a cluster is in equilibrium. A 25 // lower value comes at the cost of increased recovery time after a 26 // partition. This parameter begins to take effect when there are 27 // more than ~48K clients querying 5x servers or at lower server 28 // values when there is a partition. 29 // 30 // For example, in a 100K Nomad cluster with 5x servers, it will 31 // take ~5min for all servers to rebalance their connections. If 32 // 99,995 agents are in the minority talking to only one server, it 33 // will take ~26min for all servers to rebalance. A 10K cluster in 34 // the same scenario will take ~2.6min to rebalance. 35 newRebalanceConnsPerSecPerServer = 64 36 ) 37 38 // Pinger is an interface for pinging a server to see if it is healthy. 39 type Pinger interface { 40 Ping(addr net.Addr) error 41 } 42 43 // Server contains the address of a server and metadata that can be used for 44 // choosing a server to contact. 45 type Server struct { 46 // Addr is the resolved address of the server 47 Addr net.Addr 48 addr string 49 sync.Mutex 50 51 // DC is the datacenter of the server 52 DC string 53 } 54 55 func (s *Server) Copy() *Server { 56 s.Lock() 57 defer s.Unlock() 58 59 return &Server{ 60 Addr: s.Addr, 61 addr: s.addr, 62 DC: s.DC, 63 } 64 } 65 66 func (s *Server) String() string { 67 s.Lock() 68 defer s.Unlock() 69 70 if s.addr == "" { 71 s.addr = s.Addr.String() 72 } 73 74 return s.addr 75 } 76 77 type Servers []*Server 78 79 func (s Servers) String() string { 80 addrs := make([]string, 0, len(s)) 81 for _, srv := range s { 82 addrs = append(addrs, srv.String()) 83 } 84 return strings.Join(addrs, ",") 85 } 86 87 // cycle cycles a list of servers in-place 88 func (s Servers) cycle() { 89 numServers := len(s) 90 if numServers < 2 { 91 return // No action required 92 } 93 94 start := s[0] 95 for i := 1; i < numServers; i++ { 96 s[i-1] = s[i] 97 } 98 s[numServers-1] = start 99 } 100 101 // shuffle shuffles the server list in place 102 func (s Servers) shuffle() { 103 for i := len(s) - 1; i > 0; i-- { 104 j := rand.Int31n(int32(i + 1)) 105 s[i], s[j] = s[j], s[i] 106 } 107 } 108 109 type Manager struct { 110 // servers is the list of all known Nomad servers. 111 servers Servers 112 113 // rebalanceTimer controls the duration of the rebalance interval 114 rebalanceTimer *time.Timer 115 116 // shutdownCh is a copy of the channel in Nomad.Client 117 shutdownCh chan struct{} 118 119 logger *log.Logger 120 121 // numNodes is used to estimate the approximate number of nodes in 122 // a cluster and limit the rate at which it rebalances server 123 // connections. This should be read and set using atomic. 124 numNodes int32 125 126 // connPoolPinger is used to test the health of a server in the connection 127 // pool. Pinger is an interface that wraps client.ConnPool. 128 connPoolPinger Pinger 129 130 sync.Mutex 131 } 132 133 // New is the only way to safely create a new Manager struct. 134 func New(logger *log.Logger, shutdownCh chan struct{}, connPoolPinger Pinger) (m *Manager) { 135 return &Manager{ 136 logger: logger, 137 connPoolPinger: connPoolPinger, 138 rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration), 139 shutdownCh: shutdownCh, 140 } 141 } 142 143 // Start is used to start and manage the task of automatically shuffling and 144 // rebalancing the list of Nomad servers in order to distribute load across 145 // all known and available Nomad servers. 146 func (m *Manager) Start() { 147 for { 148 select { 149 case <-m.rebalanceTimer.C: 150 m.RebalanceServers() 151 m.refreshServerRebalanceTimer() 152 153 case <-m.shutdownCh: 154 m.logger.Printf("[DEBUG] manager: shutting down") 155 return 156 } 157 } 158 } 159 160 func (m *Manager) SetServers(servers Servers) { 161 m.Lock() 162 defer m.Unlock() 163 m.servers = servers 164 } 165 166 // FindServer returns a server to send an RPC too. If there are no servers, nil 167 // is returned. 168 func (m *Manager) FindServer() *Server { 169 m.Lock() 170 defer m.Unlock() 171 172 if len(m.servers) == 0 { 173 m.logger.Printf("[WARN] manager: No servers available") 174 return nil 175 } 176 177 // Return whatever is at the front of the list because it is 178 // assumed to be the oldest in the server list (unless - 179 // hypothetically - the server list was rotated right after a 180 // server was added). 181 return m.servers[0] 182 } 183 184 // NumNodes returns the number of approximate nodes in the cluster. 185 func (m *Manager) NumNodes() int32 { 186 m.Lock() 187 defer m.Unlock() 188 return m.numNodes 189 } 190 191 // SetNumNodes stores the number of approximate nodes in the cluster. 192 func (m *Manager) SetNumNodes(n int32) { 193 m.Lock() 194 defer m.Unlock() 195 m.numNodes = n 196 } 197 198 // NotifyFailedServer marks the passed in server as "failed" by rotating it 199 // to the end of the server list. 200 func (m *Manager) NotifyFailedServer(s *Server) { 201 m.Lock() 202 defer m.Unlock() 203 204 // If the server being failed is not the first server on the list, 205 // this is a noop. If, however, the server is failed and first on 206 // the list, move the server to the end of the list. 207 if len(m.servers) > 1 && m.servers[0] == s { 208 m.servers.cycle() 209 } 210 } 211 212 // NumServers returns the total number of known servers whether healthy or not. 213 func (m *Manager) NumServers() int { 214 m.Lock() 215 defer m.Unlock() 216 return len(m.servers) 217 } 218 219 // GetServers returns a copy of the current list of servers. 220 func (m *Manager) GetServers() Servers { 221 m.Lock() 222 defer m.Unlock() 223 224 copy := make([]*Server, 0, len(m.servers)) 225 for _, s := range m.servers { 226 copy = append(copy, s.Copy()) 227 } 228 229 return copy 230 } 231 232 // RebalanceServers shuffles the order in which Servers will be contacted. The 233 // function will shuffle the set of potential servers to contact and then attempt 234 // to contact each server. If a server successfully responds it is used, otherwise 235 // it is rotated such that it will be the last attempted server. 236 func (m *Manager) RebalanceServers() { 237 // Shuffle servers so we have a chance of picking a new one. 238 servers := m.GetServers() 239 servers.shuffle() 240 241 // Iterate through the shuffled server list to find an assumed 242 // healthy server. NOTE: Do not iterate on the list directly because 243 // this loop mutates the server list in-place. 244 var foundHealthyServer bool 245 for i := 0; i < len(m.servers); i++ { 246 // Always test the first server. Failed servers are cycled 247 // while Serf detects the node has failed. 248 srv := servers[0] 249 250 err := m.connPoolPinger.Ping(srv.Addr) 251 if err == nil { 252 foundHealthyServer = true 253 break 254 } 255 m.logger.Printf(`[DEBUG] manager: pinging server "%s" failed: %s`, srv, err) 256 257 servers.cycle() 258 } 259 260 if !foundHealthyServer { 261 m.logger.Printf("[DEBUG] manager: No healthy servers during rebalance") 262 return 263 } 264 265 // Save the servers 266 m.Lock() 267 m.servers = servers 268 m.Unlock() 269 } 270 271 // refreshServerRebalanceTimer is only called once m.rebalanceTimer expires. 272 func (m *Manager) refreshServerRebalanceTimer() time.Duration { 273 m.Lock() 274 defer m.Unlock() 275 numServers := len(m.servers) 276 277 // Limit this connection's life based on the size (and health) of the 278 // cluster. Never rebalance a connection more frequently than 279 // connReuseLowWatermarkDuration, and make sure we never exceed 280 // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. 281 clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer) 282 283 connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, clientRPCMinReuseDuration, int(m.numNodes)) 284 connRebalanceTimeout += lib.RandomStagger(connRebalanceTimeout) 285 286 m.rebalanceTimer.Reset(connRebalanceTimeout) 287 return connRebalanceTimeout 288 } 289 290 // ResetRebalanceTimer resets the rebalance timer. This method exists for 291 // testing and should not be used directly. 292 func (m *Manager) ResetRebalanceTimer() { 293 m.Lock() 294 defer m.Unlock() 295 m.rebalanceTimer.Reset(clientRPCMinReuseDuration) 296 }