github.com/janma/nomad@v0.11.3/command/agent/testagent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "math/rand" 8 "net/http" 9 "net/http/httptest" 10 "os" 11 "path/filepath" 12 "strings" 13 "time" 14 15 testing "github.com/mitchellh/go-testing-interface" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/go-hclog" 19 "github.com/hashicorp/nomad/api" 20 "github.com/hashicorp/nomad/client/fingerprint" 21 "github.com/hashicorp/nomad/helper/freeport" 22 "github.com/hashicorp/nomad/helper/testlog" 23 "github.com/hashicorp/nomad/nomad" 24 "github.com/hashicorp/nomad/nomad/mock" 25 "github.com/hashicorp/nomad/nomad/structs" 26 sconfig "github.com/hashicorp/nomad/nomad/structs/config" 27 "github.com/hashicorp/nomad/testutil" 28 ) 29 30 func init() { 31 rand.Seed(time.Now().UnixNano()) // seed random number generator 32 } 33 34 // TempDir defines the base dir for temporary directories. 35 var TempDir = os.TempDir() 36 37 // TestAgent encapsulates an Agent with a default configuration and startup 38 // procedure suitable for testing. It manages a temporary data directory which 39 // is removed after shutdown. 40 type TestAgent struct { 41 // T is the testing object 42 T testing.T 43 44 // Name is an optional name of the agent. 45 Name string 46 47 // ConfigCallback is an optional callback that allows modification of the 48 // configuration before the agent is started. 49 ConfigCallback func(*Config) 50 51 // Config is the agent configuration. If Config is nil then 52 // TestConfig() is used. If Config.DataDir is set then it is 53 // the callers responsibility to clean up the data directory. 54 // Otherwise, a temporary data directory is created and removed 55 // when Shutdown() is called. 56 Config *Config 57 58 // LogOutput is the sink for the logs. If nil, logs are written 59 // to os.Stderr. 60 LogOutput io.Writer 61 62 // DataDir is the data directory which is used when Config.DataDir 63 // is not set. It is created automatically and removed when 64 // Shutdown() is called. 65 DataDir string 66 67 // Key is the optional encryption key for the keyring. 68 Key string 69 70 // Server is a reference to the started HTTP endpoint. 71 // It is valid after Start(). 72 Server *HTTPServer 73 74 // Agent is the embedded Nomad agent. 75 // It is valid after Start(). 76 *Agent 77 78 // RootToken is auto-bootstrapped if ACLs are enabled 79 RootToken *structs.ACLToken 80 81 // ports that are reserved through freeport that must be returned at 82 // the end of a test, done when Shutdown() is called. 83 ports []int 84 85 // Enterprise specifies if the agent is enterprise or not 86 Enterprise bool 87 } 88 89 // NewTestAgent returns a started agent with the given name and 90 // configuration. The caller should call Shutdown() to stop the agent and 91 // remove temporary directories. 92 func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent { 93 a := &TestAgent{ 94 T: t, 95 Name: name, 96 ConfigCallback: configCallback, 97 Enterprise: EnterpriseTestAgent, 98 } 99 100 a.Start() 101 return a 102 } 103 104 // Start starts a test agent. 105 func (a *TestAgent) Start() *TestAgent { 106 if a.Agent != nil { 107 a.T.Fatalf("TestAgent already started") 108 } 109 if a.Config == nil { 110 a.Config = a.config() 111 } 112 if a.Config.DataDir == "" { 113 name := "agent" 114 if a.Name != "" { 115 name = a.Name + "-agent" 116 } 117 name = strings.Replace(name, "/", "_", -1) 118 d, err := ioutil.TempDir(TempDir, name) 119 if err != nil { 120 a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err) 121 } 122 a.DataDir = d 123 a.Config.DataDir = d 124 a.Config.NomadConfig.DataDir = d 125 } 126 127 i := 10 128 129 RETRY: 130 i-- 131 a.pickRandomPorts(a.Config) 132 if a.Config.NodeName == "" { 133 a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC) 134 } 135 136 // write the keyring 137 if a.Key != "" { 138 writeKey := func(key, filename string) { 139 path := filepath.Join(a.Config.DataDir, filename) 140 if err := initKeyring(path, key); err != nil { 141 a.T.Fatalf("Error creating keyring %s: %s", path, err) 142 } 143 } 144 writeKey(a.Key, serfKeyring) 145 } 146 147 // we need the err var in the next exit condition 148 agent, err := a.start() 149 if err == nil { 150 a.Agent = agent 151 } else if i == 0 { 152 a.T.Fatalf("%s: Error starting agent: %v", a.Name, err) 153 } else { 154 155 if agent != nil { 156 agent.Shutdown() 157 } 158 wait := time.Duration(rand.Int31n(2000)) * time.Millisecond 159 a.T.Logf("%s: retrying in %v", a.Name, wait) 160 time.Sleep(wait) 161 162 // Clean out the data dir if we are responsible for it before we 163 // try again, since the old ports may have gotten written to 164 // the data dir, such as in the Raft configuration. 165 if a.DataDir != "" { 166 if err := os.RemoveAll(a.DataDir); err != nil { 167 a.T.Fatalf("%s: Error resetting data dir: %v", a.Name, err) 168 } 169 } 170 171 goto RETRY 172 } 173 174 failed := false 175 if a.Config.NomadConfig.BootstrapExpect == 1 && a.Config.Server.Enabled { 176 testutil.WaitForResult(func() (bool, error) { 177 args := &structs.GenericRequest{} 178 var leader string 179 err := a.RPC("Status.Leader", args, &leader) 180 return leader != "", err 181 }, func(err error) { 182 a.T.Logf("failed to find leader: %v", err) 183 failed = true 184 }) 185 } else { 186 testutil.WaitForResult(func() (bool, error) { 187 req, _ := http.NewRequest("GET", "/v1/agent/self", nil) 188 resp := httptest.NewRecorder() 189 _, err := a.Server.AgentSelfRequest(resp, req) 190 return err == nil && resp.Code == 200, err 191 }, func(err error) { 192 a.T.Logf("failed to find leader: %v", err) 193 failed = true 194 }) 195 } 196 if failed { 197 a.Agent.Shutdown() 198 if i == 0 { 199 a.T.Fatalf("ran out of retries trying to start test agent") 200 } 201 goto RETRY 202 } 203 204 // Check if ACLs enabled. Use special value of PolicyTTL 0s 205 // to do a bypass of this step. This is so we can test bootstrap 206 // without having to pass down a special flag. 207 if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 { 208 a.RootToken = mock.ACLManagementToken() 209 state := a.Agent.server.State() 210 if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil { 211 a.T.Fatalf("token bootstrap failed: %v", err) 212 } 213 } 214 return a 215 } 216 217 func (a *TestAgent) start() (*Agent, error) { 218 if a.LogOutput == nil { 219 a.LogOutput = testlog.NewWriter(a.T) 220 } 221 222 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 223 metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm) 224 225 if inm == nil { 226 return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization") 227 } 228 229 logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{ 230 Name: "agent", 231 Level: hclog.LevelFromString(a.Config.LogLevel), 232 Output: a.LogOutput, 233 JSONFormat: a.Config.LogJson, 234 }) 235 236 agent, err := NewAgent(a.Config, logger, a.LogOutput, inm) 237 if err != nil { 238 return nil, err 239 } 240 241 // Setup the HTTP server 242 http, err := NewHTTPServer(agent, a.Config) 243 if err != nil { 244 return agent, err 245 } 246 247 a.Server = http 248 return agent, nil 249 } 250 251 // Shutdown stops the agent and removes the data directory if it is 252 // managed by the test agent. 253 func (a *TestAgent) Shutdown() error { 254 defer freeport.Return(a.ports) 255 256 defer func() { 257 if a.DataDir != "" { 258 os.RemoveAll(a.DataDir) 259 } 260 }() 261 262 // shutdown agent before endpoints 263 ch := make(chan error, 1) 264 go func() { 265 defer close(ch) 266 a.Server.Shutdown() 267 ch <- a.Agent.Shutdown() 268 }() 269 270 select { 271 case err := <-ch: 272 return err 273 case <-time.After(1 * time.Minute): 274 return fmt.Errorf("timed out while shutting down test agent") 275 } 276 } 277 278 func (a *TestAgent) HTTPAddr() string { 279 if a.Server == nil { 280 return "" 281 } 282 proto := "http://" 283 if a.Config.TLSConfig != nil && a.Config.TLSConfig.EnableHTTP { 284 proto = "https://" 285 } 286 return proto + a.Server.Addr 287 } 288 289 func (a *TestAgent) Client() *api.Client { 290 conf := api.DefaultConfig() 291 conf.Address = a.HTTPAddr() 292 c, err := api.NewClient(conf) 293 if err != nil { 294 a.T.Fatalf("Error creating Nomad API client: %s", err) 295 } 296 return c 297 } 298 299 // pickRandomPorts selects random ports from fixed size random blocks of 300 // ports. This does not eliminate the chance for port conflict but 301 // reduces it significantly with little overhead. Furthermore, asking 302 // the kernel for a random port by binding to port 0 prolongs the test 303 // execution (in our case +20sec) while also not fully eliminating the 304 // chance of port conflicts for concurrently executed test binaries. 305 // Instead of relying on one set of ports to be sufficient we retry 306 // starting the agent with different ports on port conflict. 307 func (a *TestAgent) pickRandomPorts(c *Config) { 308 ports := freeport.MustTake(3) 309 a.ports = append(a.ports, ports...) 310 311 c.Ports.HTTP = ports[0] 312 c.Ports.RPC = ports[1] 313 c.Ports.Serf = ports[2] 314 315 // Clear out the advertise addresses such that through retries we 316 // re-normalize the addresses correctly instead of using the values from the 317 // last port selection that had a port conflict. 318 if c.AdvertiseAddrs != nil { 319 c.AdvertiseAddrs.HTTP = "" 320 c.AdvertiseAddrs.RPC = "" 321 c.AdvertiseAddrs.Serf = "" 322 } 323 324 if err := c.normalizeAddrs(); err != nil { 325 a.T.Fatalf("error normalizing config: %v", err) 326 } 327 } 328 329 // TestConfig returns a unique default configuration for testing an 330 // agent. 331 func (a *TestAgent) config() *Config { 332 conf := DevConfig(nil) 333 334 // Customize the server configuration 335 config := nomad.DefaultConfig() 336 conf.NomadConfig = config 337 338 // Set the name 339 conf.NodeName = a.Name 340 341 // Bind and set ports 342 conf.BindAddr = "127.0.0.1" 343 344 conf.Consul = sconfig.DefaultConsulConfig() 345 conf.Vault.Enabled = new(bool) 346 347 // Tighten the Serf timing 348 config.SerfConfig.MemberlistConfig.SuspicionMult = 2 349 config.SerfConfig.MemberlistConfig.RetransmitMult = 2 350 config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond 351 config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond 352 config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond 353 354 // Tighten the Raft timing 355 config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond 356 config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond 357 config.RaftConfig.ElectionTimeout = 40 * time.Millisecond 358 config.RaftTimeout = 500 * time.Millisecond 359 360 // Tighten the autopilot timing 361 config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond 362 config.ServerHealthInterval = 50 * time.Millisecond 363 config.AutopilotInterval = 100 * time.Millisecond 364 365 // Tighten the fingerprinter timeouts 366 if conf.Client.Options == nil { 367 conf.Client.Options = make(map[string]string) 368 } 369 conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true" 370 371 if a.ConfigCallback != nil { 372 a.ConfigCallback(conf) 373 } 374 375 return conf 376 }