github.com/manicqin/nomad@v0.9.5/command/agent/retry_join.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  	"time"
     7  
     8  	golog "log"
     9  
    10  	log "github.com/hashicorp/go-hclog"
    11  )
    12  
    13  // DiscoverInterface is an interface for the Discover type in the go-discover
    14  // library. Using an interface allows for ease of testing.
    15  type DiscoverInterface interface {
    16  	// Addrs discovers ip addresses of nodes that match the given filter
    17  	// criteria.
    18  	// The config string must have the format 'provider=xxx key=val key=val ...'
    19  	// where the keys and values are provider specific. The values are URL
    20  	// encoded.
    21  	Addrs(string, *golog.Logger) ([]string, error)
    22  
    23  	// Help describes the format of the configuration string for address
    24  	// discovery and the various provider specific options.
    25  	Help() string
    26  
    27  	// Names returns the names of the configured providers.
    28  	Names() []string
    29  }
    30  
    31  // retryJoiner is used to handle retrying a join until it succeeds or all of
    32  // its tries are exhausted.
    33  type retryJoiner struct {
    34  	// serverJoin adds the specified servers to the serf cluster
    35  	serverJoin func([]string) (int, error)
    36  
    37  	// serverEnabled indicates whether the nomad agent will run in server mode
    38  	serverEnabled bool
    39  
    40  	// clientJoin adds the specified servers to the serf cluster
    41  	clientJoin func([]string) (int, error)
    42  
    43  	// clientEnabled indicates whether the nomad agent will run in client mode
    44  	clientEnabled bool
    45  
    46  	// discover is of type Discover, where this is either the go-discover
    47  	// implementation or a mock used for testing
    48  	discover DiscoverInterface
    49  
    50  	// errCh is used to communicate with the agent when the max retry attempt
    51  	// limit has been reached
    52  	errCh chan struct{}
    53  
    54  	// logger is the retry joiners logger
    55  	logger log.Logger
    56  }
    57  
    58  // Validate ensures that the configuration passes validity checks for the
    59  // retry_join stanza. If the configuration is not valid, returns an error that
    60  // will be displayed to the operator, otherwise nil.
    61  func (r *retryJoiner) Validate(config *Config) error {
    62  
    63  	// If retry_join is defined for the server, ensure that deprecated
    64  	// fields and the server_join stanza are not both set
    65  	if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 {
    66  		if len(config.Server.RetryJoin) != 0 {
    67  			return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza")
    68  		}
    69  		if len(config.Server.StartJoin) != 0 {
    70  			return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza")
    71  		}
    72  		if config.Server.RetryMaxAttempts != 0 {
    73  			return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza")
    74  		}
    75  
    76  		if config.Server.RetryInterval != 0 {
    77  			return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza")
    78  		}
    79  
    80  		if len(config.Server.ServerJoin.StartJoin) != 0 {
    81  			return fmt.Errorf("retry_join and start_join cannot both be defined")
    82  		}
    83  	}
    84  
    85  	// if retry_join is defined for the client, ensure that start_join is not
    86  	// set as this configuration is only defined for servers.
    87  	if config.Client != nil && config.Client.ServerJoin != nil {
    88  		if config.Client.ServerJoin.StartJoin != nil {
    89  			return fmt.Errorf("start_join is not supported for Nomad clients")
    90  		}
    91  	}
    92  
    93  	return nil
    94  }
    95  
    96  // retryJoin is used to handle retrying a join until it succeeds or all retries
    97  // are exhausted.
    98  func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
    99  	if len(serverJoin.RetryJoin) == 0 {
   100  		return
   101  	}
   102  
   103  	attempt := 0
   104  
   105  	addrsToJoin := strings.Join(serverJoin.RetryJoin, " ")
   106  	r.logger.Info("starting retry join", "servers", addrsToJoin)
   107  
   108  	standardLogger := r.logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true})
   109  	for {
   110  		var addrs []string
   111  		var n int
   112  		var err error
   113  
   114  		for _, addr := range serverJoin.RetryJoin {
   115  			switch {
   116  			case strings.HasPrefix(addr, "provider="):
   117  				servers, err := r.discover.Addrs(addr, standardLogger)
   118  				if err != nil {
   119  					r.logger.Error("determining join addresses failed", "error", err)
   120  				} else {
   121  					addrs = append(addrs, servers...)
   122  				}
   123  			default:
   124  				addrs = append(addrs, addr)
   125  			}
   126  		}
   127  
   128  		if len(addrs) > 0 {
   129  			if r.serverEnabled && r.serverJoin != nil {
   130  				n, err = r.serverJoin(addrs)
   131  				if err == nil {
   132  					r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "server")
   133  					return
   134  				}
   135  			}
   136  			if r.clientEnabled && r.clientJoin != nil {
   137  				n, err = r.clientJoin(addrs)
   138  				if err == nil {
   139  					r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "client")
   140  					return
   141  				}
   142  			}
   143  		}
   144  
   145  		attempt++
   146  		if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts {
   147  			r.logger.Error("max join retry exhausted, exiting")
   148  			close(r.errCh)
   149  			return
   150  		}
   151  
   152  		if err != nil {
   153  			r.logger.Warn("join failed", "error", err, "retry", serverJoin.RetryInterval)
   154  		}
   155  		time.Sleep(serverJoin.RetryInterval)
   156  	}
   157  }