github.com/hernad/nomad@v1.6.112/command/agent/retry_join.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package agent 5 6 import ( 7 "fmt" 8 golog "log" 9 "strings" 10 "time" 11 12 log "github.com/hashicorp/go-hclog" 13 ) 14 15 // DiscoverInterface is an interface for the Discover type in the go-discover 16 // library. Using an interface allows for ease of testing. 17 type DiscoverInterface interface { 18 // Addrs discovers ip addresses of nodes that match the given filter 19 // criteria. 20 // The config string must have the format 'provider=xxx key=val key=val ...' 21 // where the keys and values are provider specific. The values are URL 22 // encoded. 23 Addrs(string, *golog.Logger) ([]string, error) 24 25 // Help describes the format of the configuration string for address 26 // discovery and the various provider specific options. 27 Help() string 28 29 // Names returns the names of the configured providers. 30 Names() []string 31 } 32 33 // retryJoiner is used to handle retrying a join until it succeeds or all of 34 // its tries are exhausted. 35 type retryJoiner struct { 36 // serverJoin adds the specified servers to the serf cluster 37 serverJoin func([]string) (int, error) 38 39 // serverEnabled indicates whether the nomad agent will run in server mode 40 serverEnabled bool 41 42 // clientJoin adds the specified servers to the serf cluster 43 clientJoin func([]string) (int, error) 44 45 // clientEnabled indicates whether the nomad agent will run in client mode 46 clientEnabled bool 47 48 // discover is of type Discover, where this is either the go-discover 49 // implementation or a mock used for testing 50 discover DiscoverInterface 51 52 // errCh is used to communicate with the agent when the max retry attempt 53 // limit has been reached 54 errCh chan struct{} 55 56 // logger is the retry joiners logger 57 logger log.Logger 58 } 59 60 // Validate ensures that the configuration passes validity checks for the 61 // retry_join block. If the configuration is not valid, returns an error that 62 // will be displayed to the operator, otherwise nil. 63 func (r *retryJoiner) Validate(config *Config) error { 64 65 // If retry_join is defined for the server, ensure that deprecated 66 // fields and the server_join block are not both set 67 if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 { 68 if len(config.Server.RetryJoin) != 0 { 69 return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join block") 70 } 71 if len(config.Server.StartJoin) != 0 { 72 return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join block") 73 } 74 if config.Server.RetryMaxAttempts != 0 { 75 return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join block") 76 } 77 78 if config.Server.RetryInterval != 0 { 79 return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join block") 80 } 81 82 if len(config.Server.ServerJoin.StartJoin) != 0 { 83 return fmt.Errorf("retry_join and start_join cannot both be defined") 84 } 85 } 86 87 // if retry_join is defined for the client, ensure that start_join is not 88 // set as this configuration is only defined for servers. 89 if config.Client != nil && config.Client.ServerJoin != nil { 90 if config.Client.ServerJoin.StartJoin != nil { 91 return fmt.Errorf("start_join is not supported for Nomad clients") 92 } 93 } 94 95 return nil 96 } 97 98 // retryJoin is used to handle retrying a join until it succeeds or all retries 99 // are exhausted. 100 func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { 101 if len(serverJoin.RetryJoin) == 0 { 102 return 103 } 104 105 attempt := 0 106 107 addrsToJoin := strings.Join(serverJoin.RetryJoin, " ") 108 r.logger.Info("starting retry join", "servers", addrsToJoin) 109 110 standardLogger := r.logger.StandardLogger(&log.StandardLoggerOptions{InferLevels: true}) 111 for { 112 var addrs []string 113 var n int 114 var err error 115 116 for _, addr := range serverJoin.RetryJoin { 117 switch { 118 case strings.HasPrefix(addr, "provider="): 119 servers, err := r.discover.Addrs(addr, standardLogger) 120 if err != nil { 121 r.logger.Error("determining join addresses failed", "error", err) 122 } else { 123 addrs = append(addrs, servers...) 124 } 125 default: 126 addrs = append(addrs, addr) 127 } 128 } 129 130 if len(addrs) > 0 { 131 if r.serverEnabled && r.serverJoin != nil { 132 n, err = r.serverJoin(addrs) 133 if err == nil { 134 r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "server") 135 return 136 } 137 } 138 if r.clientEnabled && r.clientJoin != nil { 139 n, err = r.clientJoin(addrs) 140 if err == nil { 141 r.logger.Info("retry join completed", "initial_servers", n, "agent_mode", "client") 142 return 143 } 144 } 145 } 146 147 attempt++ 148 if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts { 149 r.logger.Error("max join retry exhausted, exiting") 150 close(r.errCh) 151 return 152 } 153 154 if err != nil { 155 r.logger.Warn("join failed", "error", err, "retry", serverJoin.RetryInterval) 156 } 157 time.Sleep(serverJoin.RetryInterval) 158 } 159 }