github.com/anuvu/nomad@v0.8.7-atom1/client/servers/manager.go (about)

     1  // Package servers provides an interface for choosing Servers to communicate
     2  // with from a Nomad Client perspective.  The package does not provide any API
     3  // guarantees and should be called only by `hashicorp/nomad`.
     4  package servers
     5  
     6  import (
     7  	"log"
     8  	"math/rand"
     9  	"net"
    10  	"sort"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/hashicorp/consul/lib"
    16  )
    17  
    18  const (
    19  	// clientRPCMinReuseDuration controls the minimum amount of time RPC
    20  	// queries are sent over an established connection to a single server
    21  	clientRPCMinReuseDuration = 5 * time.Minute
    22  
    23  	// Limit the number of new connections a server receives per second
    24  	// for connection rebalancing.  This limit caps the load caused by
    25  	// continual rebalancing efforts when a cluster is in equilibrium.  A
    26  	// lower value comes at the cost of increased recovery time after a
    27  	// partition.  This parameter begins to take effect when there are
    28  	// more than ~48K clients querying 5x servers or at lower server
    29  	// values when there is a partition.
    30  	//
    31  	// For example, in a 100K Nomad cluster with 5x servers, it will
    32  	// take ~5min for all servers to rebalance their connections.  If
    33  	// 99,995 agents are in the minority talking to only one server, it
    34  	// will take ~26min for all servers to rebalance.  A 10K cluster in
    35  	// the same scenario will take ~2.6min to rebalance.
    36  	newRebalanceConnsPerSecPerServer = 64
    37  )
    38  
    39  // Pinger is an interface for pinging a server to see if it is healthy.
    40  type Pinger interface {
    41  	Ping(addr net.Addr) error
    42  }
    43  
    44  // Server contains the address of a server and metadata that can be used for
    45  // choosing a server to contact.
    46  type Server struct {
    47  	// Addr is the resolved address of the server
    48  	Addr net.Addr
    49  	addr string
    50  	sync.Mutex
    51  
    52  	// DC is the datacenter of the server
    53  	DC string
    54  }
    55  
    56  func (s *Server) Copy() *Server {
    57  	s.Lock()
    58  	defer s.Unlock()
    59  
    60  	return &Server{
    61  		Addr: s.Addr,
    62  		addr: s.addr,
    63  		DC:   s.DC,
    64  	}
    65  }
    66  
    67  func (s *Server) String() string {
    68  	s.Lock()
    69  	defer s.Unlock()
    70  
    71  	if s.addr == "" {
    72  		s.addr = s.Addr.String()
    73  	}
    74  
    75  	return s.addr
    76  }
    77  
    78  func (s *Server) Equal(o *Server) bool {
    79  	if s == nil && o == nil {
    80  		return true
    81  	} else if s == nil && o != nil || s != nil && o == nil {
    82  		return false
    83  	}
    84  
    85  	return s.Addr.String() == o.Addr.String() && s.DC == o.DC
    86  }
    87  
    88  type Servers []*Server
    89  
    90  func (s Servers) String() string {
    91  	addrs := make([]string, 0, len(s))
    92  	for _, srv := range s {
    93  		addrs = append(addrs, srv.String())
    94  	}
    95  	return strings.Join(addrs, ",")
    96  }
    97  
    98  // cycle cycles a list of servers in-place
    99  func (s Servers) cycle() {
   100  	numServers := len(s)
   101  	if numServers < 2 {
   102  		return // No action required
   103  	}
   104  
   105  	start := s[0]
   106  	for i := 1; i < numServers; i++ {
   107  		s[i-1] = s[i]
   108  	}
   109  	s[numServers-1] = start
   110  }
   111  
   112  // shuffle shuffles the server list in place
   113  func (s Servers) shuffle() {
   114  	for i := len(s) - 1; i > 0; i-- {
   115  		j := rand.Int31n(int32(i + 1))
   116  		s[i], s[j] = s[j], s[i]
   117  	}
   118  }
   119  
   120  func (s Servers) Sort() {
   121  	sort.Slice(s, func(i, j int) bool {
   122  		a, b := s[i], s[j]
   123  		if addr1, addr2 := a.Addr.String(), b.Addr.String(); addr1 == addr2 {
   124  			return a.DC < b.DC
   125  		} else {
   126  			return addr1 < addr2
   127  		}
   128  	})
   129  }
   130  
   131  // Equal returns if the two server lists are equal, including the ordering.
   132  func (s Servers) Equal(o Servers) bool {
   133  	if len(s) != len(o) {
   134  		return false
   135  	}
   136  
   137  	for i, v := range s {
   138  		if !v.Equal(o[i]) {
   139  			return false
   140  		}
   141  	}
   142  
   143  	return true
   144  }
   145  
   146  type Manager struct {
   147  	// servers is the list of all known Nomad servers.
   148  	servers Servers
   149  
   150  	// rebalanceTimer controls the duration of the rebalance interval
   151  	rebalanceTimer *time.Timer
   152  
   153  	// shutdownCh is a copy of the channel in Nomad.Client
   154  	shutdownCh chan struct{}
   155  
   156  	logger *log.Logger
   157  
   158  	// numNodes is used to estimate the approximate number of nodes in
   159  	// a cluster and limit the rate at which it rebalances server
   160  	// connections. This should be read and set using atomic.
   161  	numNodes int32
   162  
   163  	// connPoolPinger is used to test the health of a server in the connection
   164  	// pool. Pinger is an interface that wraps client.ConnPool.
   165  	connPoolPinger Pinger
   166  
   167  	sync.Mutex
   168  }
   169  
   170  // New is the only way to safely create a new Manager struct.
   171  func New(logger *log.Logger, shutdownCh chan struct{}, connPoolPinger Pinger) (m *Manager) {
   172  	return &Manager{
   173  		logger:         logger,
   174  		connPoolPinger: connPoolPinger,
   175  		rebalanceTimer: time.NewTimer(clientRPCMinReuseDuration),
   176  		shutdownCh:     shutdownCh,
   177  	}
   178  }
   179  
   180  // Start is used to start and manage the task of automatically shuffling and
   181  // rebalancing the list of Nomad servers in order to distribute load across
   182  // all known and available Nomad servers.
   183  func (m *Manager) Start() {
   184  	for {
   185  		select {
   186  		case <-m.rebalanceTimer.C:
   187  			m.RebalanceServers()
   188  			m.refreshServerRebalanceTimer()
   189  
   190  		case <-m.shutdownCh:
   191  			m.logger.Printf("[DEBUG] manager: shutting down")
   192  			return
   193  		}
   194  	}
   195  }
   196  
   197  // SetServers sets the servers and returns if the new server list is different
   198  // than the existing server set
   199  func (m *Manager) SetServers(servers Servers) bool {
   200  	m.Lock()
   201  	defer m.Unlock()
   202  
   203  	// Sort both the  existing and incoming servers
   204  	servers.Sort()
   205  	m.servers.Sort()
   206  
   207  	// Determine if they are equal
   208  	equal := servers.Equal(m.servers)
   209  
   210  	// Randomize the incoming servers
   211  	servers.shuffle()
   212  	m.servers = servers
   213  
   214  	return !equal
   215  }
   216  
   217  // FindServer returns a server to send an RPC too. If there are no servers, nil
   218  // is returned.
   219  func (m *Manager) FindServer() *Server {
   220  	m.Lock()
   221  	defer m.Unlock()
   222  
   223  	if len(m.servers) == 0 {
   224  		m.logger.Printf("[WARN] manager: No servers available")
   225  		return nil
   226  	}
   227  
   228  	// Return whatever is at the front of the list because it is
   229  	// assumed to be the oldest in the server list (unless -
   230  	// hypothetically - the server list was rotated right after a
   231  	// server was added).
   232  	return m.servers[0]
   233  }
   234  
   235  // NumNodes returns the number of approximate nodes in the cluster.
   236  func (m *Manager) NumNodes() int32 {
   237  	m.Lock()
   238  	defer m.Unlock()
   239  	return m.numNodes
   240  }
   241  
   242  // SetNumNodes stores the number of approximate nodes in the cluster.
   243  func (m *Manager) SetNumNodes(n int32) {
   244  	m.Lock()
   245  	defer m.Unlock()
   246  	m.numNodes = n
   247  }
   248  
   249  // NotifyFailedServer marks the passed in server as "failed" by rotating it
   250  // to the end of the server list.
   251  func (m *Manager) NotifyFailedServer(s *Server) {
   252  	m.Lock()
   253  	defer m.Unlock()
   254  
   255  	// If the server being failed is not the first server on the list,
   256  	// this is a noop.  If, however, the server is failed and first on
   257  	// the list, move the server to the end of the list.
   258  	if len(m.servers) > 1 && m.servers[0].Equal(s) {
   259  		m.servers.cycle()
   260  	}
   261  }
   262  
   263  // NumServers returns the total number of known servers whether healthy or not.
   264  func (m *Manager) NumServers() int {
   265  	m.Lock()
   266  	defer m.Unlock()
   267  	return len(m.servers)
   268  }
   269  
   270  // GetServers returns a copy of the current list of servers.
   271  func (m *Manager) GetServers() Servers {
   272  	m.Lock()
   273  	defer m.Unlock()
   274  
   275  	copy := make([]*Server, 0, len(m.servers))
   276  	for _, s := range m.servers {
   277  		copy = append(copy, s.Copy())
   278  	}
   279  
   280  	return copy
   281  }
   282  
   283  // RebalanceServers shuffles the order in which Servers will be contacted. The
   284  // function will shuffle the set of potential servers to contact and then attempt
   285  // to contact each server. If a server successfully responds it is used, otherwise
   286  // it is rotated such that it will be the last attempted server.
   287  func (m *Manager) RebalanceServers() {
   288  	// Shuffle servers so we have a chance of picking a new one.
   289  	servers := m.GetServers()
   290  	servers.shuffle()
   291  
   292  	// Iterate through the shuffled server list to find an assumed
   293  	// healthy server.  NOTE: Do not iterate on the list directly because
   294  	// this loop mutates the server list in-place.
   295  	var foundHealthyServer bool
   296  	for i := 0; i < len(m.servers); i++ {
   297  		// Always test the first server.  Failed servers are cycled
   298  		// while Serf detects the node has failed.
   299  		srv := servers[0]
   300  
   301  		err := m.connPoolPinger.Ping(srv.Addr)
   302  		if err == nil {
   303  			foundHealthyServer = true
   304  			break
   305  		}
   306  		m.logger.Printf(`[DEBUG] manager: pinging server "%s" failed: %s`, srv, err)
   307  
   308  		servers.cycle()
   309  	}
   310  
   311  	if !foundHealthyServer {
   312  		m.logger.Printf("[DEBUG] manager: No healthy servers during rebalance")
   313  		return
   314  	}
   315  
   316  	// Save the servers
   317  	m.Lock()
   318  	m.servers = servers
   319  	m.Unlock()
   320  }
   321  
   322  // refreshServerRebalanceTimer is only called once m.rebalanceTimer expires.
   323  func (m *Manager) refreshServerRebalanceTimer() time.Duration {
   324  	m.Lock()
   325  	defer m.Unlock()
   326  	numServers := len(m.servers)
   327  
   328  	// Limit this connection's life based on the size (and health) of the
   329  	// cluster.  Never rebalance a connection more frequently than
   330  	// connReuseLowWatermarkDuration, and make sure we never exceed
   331  	// clusterWideRebalanceConnsPerSec operations/s across numLANMembers.
   332  	clusterWideRebalanceConnsPerSec := float64(numServers * newRebalanceConnsPerSecPerServer)
   333  
   334  	connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, clientRPCMinReuseDuration, int(m.numNodes))
   335  	connRebalanceTimeout += lib.RandomStagger(connRebalanceTimeout)
   336  
   337  	m.rebalanceTimer.Reset(connRebalanceTimeout)
   338  	return connRebalanceTimeout
   339  }
   340  
   341  // ResetRebalanceTimer resets the rebalance timer.  This method exists for
   342  // testing and should not be used directly.
   343  func (m *Manager) ResetRebalanceTimer() {
   344  	m.Lock()
   345  	defer m.Unlock()
   346  	m.rebalanceTimer.Reset(clientRPCMinReuseDuration)
   347  }