github.com/prestonp/nomad@v0.10.4/command/agent/testagent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "math/rand" 8 "net/http" 9 "net/http/httptest" 10 "os" 11 "path/filepath" 12 "strings" 13 "time" 14 15 testing "github.com/mitchellh/go-testing-interface" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/go-hclog" 19 "github.com/hashicorp/nomad/api" 20 "github.com/hashicorp/nomad/client/fingerprint" 21 "github.com/hashicorp/nomad/helper/freeport" 22 "github.com/hashicorp/nomad/helper/testlog" 23 "github.com/hashicorp/nomad/nomad" 24 "github.com/hashicorp/nomad/nomad/mock" 25 "github.com/hashicorp/nomad/nomad/structs" 26 sconfig "github.com/hashicorp/nomad/nomad/structs/config" 27 "github.com/hashicorp/nomad/testutil" 28 ) 29 30 func init() { 31 rand.Seed(time.Now().UnixNano()) // seed random number generator 32 } 33 34 // TempDir defines the base dir for temporary directories. 35 var TempDir = os.TempDir() 36 37 // TestAgent encapsulates an Agent with a default configuration and startup 38 // procedure suitable for testing. It manages a temporary data directory which 39 // is removed after shutdown. 40 type TestAgent struct { 41 // T is the testing object 42 T testing.T 43 44 // Name is an optional name of the agent. 45 Name string 46 47 // ConfigCallback is an optional callback that allows modification of the 48 // configuration before the agent is started. 49 ConfigCallback func(*Config) 50 51 // Config is the agent configuration. If Config is nil then 52 // TestConfig() is used. If Config.DataDir is set then it is 53 // the callers responsibility to clean up the data directory. 54 // Otherwise, a temporary data directory is created and removed 55 // when Shutdown() is called. 56 Config *Config 57 58 // LogOutput is the sink for the logs. If nil, logs are written 59 // to os.Stderr. 60 LogOutput io.Writer 61 62 // DataDir is the data directory which is used when Config.DataDir 63 // is not set. It is created automatically and removed when 64 // Shutdown() is called. 65 DataDir string 66 67 // Key is the optional encryption key for the keyring. 68 Key string 69 70 // Server is a reference to the started HTTP endpoint. 71 // It is valid after Start(). 72 Server *HTTPServer 73 74 // Agent is the embedded Nomad agent. 75 // It is valid after Start(). 76 *Agent 77 78 // RootToken is auto-bootstrapped if ACLs are enabled 79 RootToken *structs.ACLToken 80 81 // ports that are reserved through freeport that must be returned at 82 // the end of a test, done when Shutdown() is called. 83 ports []int 84 } 85 86 // NewTestAgent returns a started agent with the given name and 87 // configuration. The caller should call Shutdown() to stop the agent and 88 // remove temporary directories. 89 func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent { 90 a := &TestAgent{ 91 T: t, 92 Name: name, 93 ConfigCallback: configCallback, 94 } 95 96 a.Start() 97 return a 98 } 99 100 // Start starts a test agent. 101 func (a *TestAgent) Start() *TestAgent { 102 if a.Agent != nil { 103 a.T.Fatalf("TestAgent already started") 104 } 105 if a.Config == nil { 106 a.Config = a.config() 107 } 108 if a.Config.DataDir == "" { 109 name := "agent" 110 if a.Name != "" { 111 name = a.Name + "-agent" 112 } 113 name = strings.Replace(name, "/", "_", -1) 114 d, err := ioutil.TempDir(TempDir, name) 115 if err != nil { 116 a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err) 117 } 118 a.DataDir = d 119 a.Config.DataDir = d 120 a.Config.NomadConfig.DataDir = d 121 } 122 123 i := 10 124 125 RETRY: 126 for ; i >= 0; i-- { 127 a.pickRandomPorts(a.Config) 128 if a.Config.NodeName == "" { 129 a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC) 130 } 131 132 // write the keyring 133 if a.Key != "" { 134 writeKey := func(key, filename string) { 135 path := filepath.Join(a.Config.DataDir, filename) 136 if err := initKeyring(path, key); err != nil { 137 a.T.Fatalf("Error creating keyring %s: %s", path, err) 138 } 139 } 140 writeKey(a.Key, serfKeyring) 141 } 142 143 // we need the err var in the next exit condition 144 agent, err := a.start() 145 if err == nil { 146 a.Agent = agent 147 break 148 } else if i == 0 { 149 a.T.Fatalf("%s: Error starting agent: %v", a.Name, err) 150 } 151 152 if agent != nil { 153 agent.Shutdown() 154 } 155 wait := time.Duration(rand.Int31n(2000)) * time.Millisecond 156 a.T.Logf("%s: retrying in %v", a.Name, wait) 157 time.Sleep(wait) 158 159 // Clean out the data dir if we are responsible for it before we 160 // try again, since the old ports may have gotten written to 161 // the data dir, such as in the Raft configuration. 162 if a.DataDir != "" { 163 if err := os.RemoveAll(a.DataDir); err != nil { 164 a.T.Fatalf("%s: Error resetting data dir: %v", a.Name, err) 165 } 166 } 167 } 168 169 failed := false 170 if a.Config.NomadConfig.Bootstrap && a.Config.Server.Enabled { 171 testutil.WaitForResult(func() (bool, error) { 172 args := &structs.GenericRequest{} 173 var leader string 174 err := a.RPC("Status.Leader", args, &leader) 175 return leader != "", err 176 }, func(err error) { 177 a.T.Logf("failed to find leader: %v", err) 178 failed = true 179 }) 180 } else { 181 testutil.WaitForResult(func() (bool, error) { 182 req, _ := http.NewRequest("GET", "/v1/agent/self", nil) 183 resp := httptest.NewRecorder() 184 _, err := a.Server.AgentSelfRequest(resp, req) 185 return err == nil && resp.Code == 200, err 186 }, func(err error) { 187 a.T.Logf("failed to find leader: %v", err) 188 failed = true 189 }) 190 } 191 if failed { 192 a.Agent.Shutdown() 193 goto RETRY 194 } 195 196 // Check if ACLs enabled. Use special value of PolicyTTL 0s 197 // to do a bypass of this step. This is so we can test bootstrap 198 // without having to pass down a special flag. 199 if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 { 200 a.RootToken = mock.ACLManagementToken() 201 state := a.Agent.server.State() 202 if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil { 203 a.T.Fatalf("token bootstrap failed: %v", err) 204 } 205 } 206 return a 207 } 208 209 func (a *TestAgent) start() (*Agent, error) { 210 if a.LogOutput == nil { 211 a.LogOutput = testlog.NewWriter(a.T) 212 } 213 214 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 215 metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm) 216 217 if inm == nil { 218 return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization") 219 } 220 221 logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{ 222 Name: "agent", 223 Level: hclog.LevelFromString(a.Config.LogLevel), 224 Output: a.LogOutput, 225 JSONFormat: a.Config.LogJson, 226 }) 227 228 agent, err := NewAgent(a.Config, logger, a.LogOutput, inm) 229 if err != nil { 230 return nil, err 231 } 232 233 // Setup the HTTP server 234 http, err := NewHTTPServer(agent, a.Config) 235 if err != nil { 236 return agent, err 237 } 238 239 a.Server = http 240 return agent, nil 241 } 242 243 // Shutdown stops the agent and removes the data directory if it is 244 // managed by the test agent. 245 func (a *TestAgent) Shutdown() error { 246 defer freeport.Return(a.ports) 247 248 defer func() { 249 if a.DataDir != "" { 250 os.RemoveAll(a.DataDir) 251 } 252 }() 253 254 // shutdown agent before endpoints 255 ch := make(chan error, 1) 256 go func() { 257 defer close(ch) 258 a.Server.Shutdown() 259 ch <- a.Agent.Shutdown() 260 }() 261 262 select { 263 case err := <-ch: 264 return err 265 case <-time.After(1 * time.Minute): 266 return fmt.Errorf("timed out while shutting down test agent") 267 } 268 } 269 270 func (a *TestAgent) HTTPAddr() string { 271 if a.Server == nil { 272 return "" 273 } 274 proto := "http://" 275 if a.Config.TLSConfig != nil && a.Config.TLSConfig.EnableHTTP { 276 proto = "https://" 277 } 278 return proto + a.Server.Addr 279 } 280 281 func (a *TestAgent) Client() *api.Client { 282 conf := api.DefaultConfig() 283 conf.Address = a.HTTPAddr() 284 c, err := api.NewClient(conf) 285 if err != nil { 286 a.T.Fatalf("Error creating Nomad API client: %s", err) 287 } 288 return c 289 } 290 291 // pickRandomPorts selects random ports from fixed size random blocks of 292 // ports. This does not eliminate the chance for port conflict but 293 // reduces it significantly with little overhead. Furthermore, asking 294 // the kernel for a random port by binding to port 0 prolongs the test 295 // execution (in our case +20sec) while also not fully eliminating the 296 // chance of port conflicts for concurrently executed test binaries. 297 // Instead of relying on one set of ports to be sufficient we retry 298 // starting the agent with different ports on port conflict. 299 func (a *TestAgent) pickRandomPorts(c *Config) { 300 ports := freeport.MustTake(3) 301 a.ports = append(a.ports, ports...) 302 303 c.Ports.HTTP = ports[0] 304 c.Ports.RPC = ports[1] 305 c.Ports.Serf = ports[2] 306 307 // Clear out the advertise addresses such that through retries we 308 // re-normalize the addresses correctly instead of using the values from the 309 // last port selection that had a port conflict. 310 if c.AdvertiseAddrs != nil { 311 c.AdvertiseAddrs.HTTP = "" 312 c.AdvertiseAddrs.RPC = "" 313 c.AdvertiseAddrs.Serf = "" 314 } 315 316 if err := c.normalizeAddrs(); err != nil { 317 a.T.Fatalf("error normalizing config: %v", err) 318 } 319 } 320 321 // TestConfig returns a unique default configuration for testing an 322 // agent. 323 func (a *TestAgent) config() *Config { 324 conf := DevConfig(nil) 325 326 // Customize the server configuration 327 config := nomad.DefaultConfig() 328 conf.NomadConfig = config 329 330 // Set the name 331 conf.NodeName = a.Name 332 333 // Bind and set ports 334 conf.BindAddr = "127.0.0.1" 335 336 conf.Consul = sconfig.DefaultConsulConfig() 337 conf.Vault.Enabled = new(bool) 338 339 // Tighten the Serf timing 340 config.SerfConfig.MemberlistConfig.SuspicionMult = 2 341 config.SerfConfig.MemberlistConfig.RetransmitMult = 2 342 config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond 343 config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond 344 config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond 345 346 // Tighten the Raft timing 347 config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond 348 config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond 349 config.RaftConfig.ElectionTimeout = 40 * time.Millisecond 350 config.RaftConfig.StartAsLeader = true 351 config.RaftTimeout = 500 * time.Millisecond 352 353 // Tighten the autopilot timing 354 config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond 355 config.ServerHealthInterval = 50 * time.Millisecond 356 config.AutopilotInterval = 100 * time.Millisecond 357 358 // Bootstrap ourselves 359 config.Bootstrap = true 360 config.BootstrapExpect = 1 361 362 // Tighten the fingerprinter timeouts 363 if conf.Client.Options == nil { 364 conf.Client.Options = make(map[string]string) 365 } 366 conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true" 367 368 if a.ConfigCallback != nil { 369 a.ConfigCallback(conf) 370 } 371 372 return conf 373 }