github.com/quite/nomad@v0.8.6/command/agent/retry_join.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"strings"
     7  	"time"
     8  )
     9  
    10  // DiscoverInterface is an interface for the Discover type in the go-discover
    11  // library. Using an interface allows for ease of testing.
    12  type DiscoverInterface interface {
    13  	// Addrs discovers ip addresses of nodes that match the given filter
    14  	// criteria.
    15  	// The config string must have the format 'provider=xxx key=val key=val ...'
    16  	// where the keys and values are provider specific. The values are URL
    17  	// encoded.
    18  	Addrs(string, *log.Logger) ([]string, error)
    19  
    20  	// Help describes the format of the configuration string for address
    21  	// discovery and the various provider specific options.
    22  	Help() string
    23  
    24  	// Names returns the names of the configured providers.
    25  	Names() []string
    26  }
    27  
    28  // retryJoiner is used to handle retrying a join until it succeeds or all of
    29  // its tries are exhausted.
    30  type retryJoiner struct {
    31  	// serverJoin adds the specified servers to the serf cluster
    32  	serverJoin func([]string) (int, error)
    33  
    34  	// serverEnabled indicates whether the nomad agent will run in server mode
    35  	serverEnabled bool
    36  
    37  	// clientJoin adds the specified servers to the serf cluster
    38  	clientJoin func([]string) (int, error)
    39  
    40  	// clientEnabled indicates whether the nomad agent will run in client mode
    41  	clientEnabled bool
    42  
    43  	// discover is of type Discover, where this is either the go-discover
    44  	// implementation or a mock used for testing
    45  	discover DiscoverInterface
    46  
    47  	// errCh is used to communicate with the agent when the max retry attempt
    48  	// limit has been reached
    49  	errCh chan struct{}
    50  
    51  	// logger is the agent logger.
    52  	logger *log.Logger
    53  }
    54  
    55  // Validate ensures that the configuration passes validity checks for the
    56  // retry_join stanza. If the configuration is not valid, returns an error that
    57  // will be displayed to the operator, otherwise nil.
    58  func (r *retryJoiner) Validate(config *Config) error {
    59  
    60  	// If retry_join is defined for the server, ensure that deprecated
    61  	// fields and the server_join stanza are not both set
    62  	if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 {
    63  		if len(config.Server.RetryJoin) != 0 {
    64  			return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza")
    65  		}
    66  		if len(config.Server.StartJoin) != 0 {
    67  			return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza")
    68  		}
    69  		if config.Server.RetryMaxAttempts != 0 {
    70  			return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza")
    71  		}
    72  
    73  		if config.Server.RetryInterval != 0 {
    74  			return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza")
    75  		}
    76  
    77  		if len(config.Server.ServerJoin.StartJoin) != 0 {
    78  			return fmt.Errorf("retry_join and start_join cannot both be defined")
    79  		}
    80  	}
    81  
    82  	// if retry_join is defined for the client, ensure that start_join is not
    83  	// set as this configuration is only defined for servers.
    84  	if config.Client != nil && config.Client.ServerJoin != nil {
    85  		if config.Client.ServerJoin.StartJoin != nil {
    86  			return fmt.Errorf("start_join is not supported for Nomad clients")
    87  		}
    88  	}
    89  
    90  	return nil
    91  }
    92  
    93  // retryJoin is used to handle retrying a join until it succeeds or all retries
    94  // are exhausted.
    95  func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
    96  	if len(serverJoin.RetryJoin) == 0 {
    97  		return
    98  	}
    99  
   100  	attempt := 0
   101  
   102  	addrsToJoin := strings.Join(serverJoin.RetryJoin, " ")
   103  	r.logger.Printf("[INFO] agent: Joining cluster... %s", addrsToJoin)
   104  
   105  	for {
   106  		var addrs []string
   107  		var n int
   108  		var err error
   109  
   110  		for _, addr := range serverJoin.RetryJoin {
   111  			switch {
   112  			case strings.HasPrefix(addr, "provider="):
   113  				servers, err := r.discover.Addrs(addr, r.logger)
   114  				if err != nil {
   115  					r.logger.Printf("[ERR] agent: Join error %s", err)
   116  				} else {
   117  					addrs = append(addrs, servers...)
   118  				}
   119  			default:
   120  				addrs = append(addrs, addr)
   121  			}
   122  		}
   123  
   124  		if len(addrs) > 0 {
   125  			if r.serverEnabled && r.serverJoin != nil {
   126  				n, err = r.serverJoin(addrs)
   127  				if err == nil {
   128  					r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n)
   129  					return
   130  				}
   131  			}
   132  			if r.clientEnabled && r.clientJoin != nil {
   133  				n, err = r.clientJoin(addrs)
   134  				if err == nil {
   135  					r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n)
   136  					return
   137  				}
   138  			}
   139  		}
   140  
   141  		attempt++
   142  		if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts {
   143  			r.logger.Printf("[ERR] agent: max join retry exhausted, exiting")
   144  			close(r.errCh)
   145  			return
   146  		}
   147  
   148  		if err != nil {
   149  			r.logger.Printf("[WARN] agent: Join failed: %q, retrying in %v", err,
   150  				serverJoin.RetryInterval)
   151  		}
   152  		time.Sleep(serverJoin.RetryInterval)
   153  	}
   154  }