github.com/quite/nomad@v0.8.6/command/agent/retry_join.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "log" 6 "strings" 7 "time" 8 ) 9 10 // DiscoverInterface is an interface for the Discover type in the go-discover 11 // library. Using an interface allows for ease of testing. 12 type DiscoverInterface interface { 13 // Addrs discovers ip addresses of nodes that match the given filter 14 // criteria. 15 // The config string must have the format 'provider=xxx key=val key=val ...' 16 // where the keys and values are provider specific. The values are URL 17 // encoded. 18 Addrs(string, *log.Logger) ([]string, error) 19 20 // Help describes the format of the configuration string for address 21 // discovery and the various provider specific options. 22 Help() string 23 24 // Names returns the names of the configured providers. 25 Names() []string 26 } 27 28 // retryJoiner is used to handle retrying a join until it succeeds or all of 29 // its tries are exhausted. 30 type retryJoiner struct { 31 // serverJoin adds the specified servers to the serf cluster 32 serverJoin func([]string) (int, error) 33 34 // serverEnabled indicates whether the nomad agent will run in server mode 35 serverEnabled bool 36 37 // clientJoin adds the specified servers to the serf cluster 38 clientJoin func([]string) (int, error) 39 40 // clientEnabled indicates whether the nomad agent will run in client mode 41 clientEnabled bool 42 43 // discover is of type Discover, where this is either the go-discover 44 // implementation or a mock used for testing 45 discover DiscoverInterface 46 47 // errCh is used to communicate with the agent when the max retry attempt 48 // limit has been reached 49 errCh chan struct{} 50 51 // logger is the agent logger. 52 logger *log.Logger 53 } 54 55 // Validate ensures that the configuration passes validity checks for the 56 // retry_join stanza. If the configuration is not valid, returns an error that 57 // will be displayed to the operator, otherwise nil. 58 func (r *retryJoiner) Validate(config *Config) error { 59 60 // If retry_join is defined for the server, ensure that deprecated 61 // fields and the server_join stanza are not both set 62 if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 { 63 if len(config.Server.RetryJoin) != 0 { 64 return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza") 65 } 66 if len(config.Server.StartJoin) != 0 { 67 return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza") 68 } 69 if config.Server.RetryMaxAttempts != 0 { 70 return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza") 71 } 72 73 if config.Server.RetryInterval != 0 { 74 return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza") 75 } 76 77 if len(config.Server.ServerJoin.StartJoin) != 0 { 78 return fmt.Errorf("retry_join and start_join cannot both be defined") 79 } 80 } 81 82 // if retry_join is defined for the client, ensure that start_join is not 83 // set as this configuration is only defined for servers. 84 if config.Client != nil && config.Client.ServerJoin != nil { 85 if config.Client.ServerJoin.StartJoin != nil { 86 return fmt.Errorf("start_join is not supported for Nomad clients") 87 } 88 } 89 90 return nil 91 } 92 93 // retryJoin is used to handle retrying a join until it succeeds or all retries 94 // are exhausted. 95 func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) { 96 if len(serverJoin.RetryJoin) == 0 { 97 return 98 } 99 100 attempt := 0 101 102 addrsToJoin := strings.Join(serverJoin.RetryJoin, " ") 103 r.logger.Printf("[INFO] agent: Joining cluster... %s", addrsToJoin) 104 105 for { 106 var addrs []string 107 var n int 108 var err error 109 110 for _, addr := range serverJoin.RetryJoin { 111 switch { 112 case strings.HasPrefix(addr, "provider="): 113 servers, err := r.discover.Addrs(addr, r.logger) 114 if err != nil { 115 r.logger.Printf("[ERR] agent: Join error %s", err) 116 } else { 117 addrs = append(addrs, servers...) 118 } 119 default: 120 addrs = append(addrs, addr) 121 } 122 } 123 124 if len(addrs) > 0 { 125 if r.serverEnabled && r.serverJoin != nil { 126 n, err = r.serverJoin(addrs) 127 if err == nil { 128 r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n) 129 return 130 } 131 } 132 if r.clientEnabled && r.clientJoin != nil { 133 n, err = r.clientJoin(addrs) 134 if err == nil { 135 r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n) 136 return 137 } 138 } 139 } 140 141 attempt++ 142 if serverJoin.RetryMaxAttempts > 0 && attempt > serverJoin.RetryMaxAttempts { 143 r.logger.Printf("[ERR] agent: max join retry exhausted, exiting") 144 close(r.errCh) 145 return 146 } 147 148 if err != nil { 149 r.logger.Printf("[WARN] agent: Join failed: %q, retrying in %v", err, 150 serverJoin.RetryInterval) 151 } 152 time.Sleep(serverJoin.RetryInterval) 153 } 154 }