github.com/bigcommerce/nomad@v0.9.3-bc/command/agent/retry_join.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "strings" 6 "time" 7 8 golog "log" 9 10 log "github.com/hashicorp/go-hclog" 11 ) 12 13 // DiscoverInterface is an interface for the Discover type in the go-discover 14 // library. Using an interface allows for ease of testing. 15 type DiscoverInterface interface { 16 // Addrs discovers ip addresses of nodes that match the given filter 17 // criteria. 18 // The config string must have the format 'provider=xxx key=val key=val ...' 19 // where the keys and values are provider specific. The values are URL 20 // encoded. 21 Addrs(string, *golog.Logger) ([]string, error) 22 23 // Help describes the format of the configuration string for address 24 // discovery and the various provider specific options. 25 Help() string 26 27 // Names returns the names of the configured providers. 28 Names() []string 29 } 30 31 // retryJoiner is used to handle retrying a join until it succeeds or all of 32 // its tries are exhausted. 33 type retryJoiner struct { 34 // serverJoin adds the specified servers to the serf cluster 35 serverJoin func([]string) (int, error) 36 37 // serverEnabled indicates whether the nomad agent will run in server mode 38 serverEnabled bool 39 40 // clientJoin adds the specified servers to the serf cluster 41 clientJoin func([]string) (int, error) 42 43 // clientEnabled indicates whether the nomad agent will run in client mode 44 clientEnabled bool 45 46 // discover is of type Discover, where this is either the go-discover 47 // implementation or a mock used for testing 48 discover DiscoverInterface 49 50 // errCh is used to communicate with the agent when the max retry attempt 51 // limit has been reached 52 errCh chan struct{} 53 54 // logger is the retry joiners logger 55 logger log.Logger 56 } 57 58 // Validate ensures that the configuration passes validity checks for the 59 // retry_join stanza. If the configuration is not valid, returns an error that 60 // will be displayed to the operator, otherwise nil. 61 func (r *retryJoiner) Validate(config *Config) error { 62 63 // If retry_join is defined for the server, ensure that deprecated 64 // fields and the server_join stanza are not both set 65 if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 { 66 if len(config.Server.RetryJoin) != 0 { 67 return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza") 68 } 69 if len(config.Server.StartJoin) != 0 { 70 return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza") 71 } 72 if config.Server.RetryMaxAttempts != 0 { 73 return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza") 74 } 75 76 if config.Server.RetryInterval != 0 { 77 return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza") 78 } 79 80 if len(config.Server.ServerJoin.StartJoin) != 0 { 81 return fmt.Errorf("retry_join and start_join cannot both be defined") 82 } 83 } 84 85 // if retry_join is defined for the client, ensure that start_join is not 86 // set as this configuration is only defined for servers. 87 if config.Client != nil && config.Client.ServerJoin != nil { 88 if config.Client.ServerJoin.StartJoin != nil { 89 return fmt.Errorf("start_join is not supported for Nomad clients") 90 } 91 } 92 93 return nil 94 } 95 96 // retryJoin is used to handle retrying a join until it succeeds or all retries 97 // are exhausted. 98 func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { 99 if len(serverJoin.RetryJoin) == 0 { 100 return 101 } 102 103 attempt := 0 104 105 addrsToJoin := strings.Join(serverJoin.RetryJoin, " ") 106 r.logger.Info("starting retry join", "servers", addrsToJoin) 107 108 standardLogger := r.logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true}) 109 for { 110 var addrs []string 111 var n int 112 var err error 113 114 for _, addr := range serverJoin.RetryJoin { 115 switch { 116 case strings.HasPrefix(addr, "provider="): 117 servers, err := r.discover.Addrs(addr, standardLogger) 118 if err != nil { 119 r.logger.Error("determining join addresses failed", "error", err) 120 } else { 121 addrs = append(addrs, servers...) 122 } 123 default: 124 addrs = append(addrs, addr) 125 } 126 } 127 128 if len(addrs) > 0 { 129 if r.serverEnabled && r.serverJoin != nil { 130 n, err = r.serverJoin(addrs) 131 if err == nil { 132 r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "server") 133 return 134 } 135 } 136 if r.clientEnabled && r.clientJoin != nil { 137 n, err = r.clientJoin(addrs) 138 if err == nil { 139 r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "client") 140 return 141 } 142 } 143 } 144 145 attempt++ 146 if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts { 147 r.logger.Error("max join retry exhausted, exiting") 148 close(r.errCh) 149 return 150 } 151 152 if err != nil { 153 r.logger.Warn("join failed", "error", err, "retry", serverJoin.RetryInterval) 154 } 155 time.Sleep(serverJoin.RetryInterval) 156 } 157 }