github.com/smithx10/nomad@v0.9.1-rc1/command/agent/testagent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "math/rand" 8 "net/http" 9 "net/http/httptest" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "time" 15 16 testing "github.com/mitchellh/go-testing-interface" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/hashicorp/consul/lib/freeport" 20 "github.com/hashicorp/go-hclog" 21 "github.com/hashicorp/nomad/api" 22 "github.com/hashicorp/nomad/client/fingerprint" 23 "github.com/hashicorp/nomad/helper/testlog" 24 "github.com/hashicorp/nomad/nomad" 25 "github.com/hashicorp/nomad/nomad/mock" 26 "github.com/hashicorp/nomad/nomad/structs" 27 sconfig "github.com/hashicorp/nomad/nomad/structs/config" 28 "github.com/hashicorp/nomad/testutil" 29 ) 30 31 func init() { 32 rand.Seed(time.Now().UnixNano()) // seed random number generator 33 } 34 35 // TempDir defines the base dir for temporary directories. 36 var TempDir = os.TempDir() 37 38 // TestAgent encapsulates an Agent with a default configuration and startup 39 // procedure suitable for testing. It manages a temporary data directory which 40 // is removed after shutdown. 41 type TestAgent struct { 42 // T is the testing object 43 T testing.T 44 45 // Name is an optional name of the agent. 46 Name string 47 48 // ConfigCallback is an optional callback that allows modification of the 49 // configuration before the agent is started. 50 ConfigCallback func(*Config) 51 52 // Config is the agent configuration. If Config is nil then 53 // TestConfig() is used. If Config.DataDir is set then it is 54 // the callers responsibility to clean up the data directory. 55 // Otherwise, a temporary data directory is created and removed 56 // when Shutdown() is called. 57 Config *Config 58 59 // LogOutput is the sink for the logs. If nil, logs are written 60 // to os.Stderr. 61 LogOutput io.Writer 62 63 // DataDir is the data directory which is used when Config.DataDir 64 // is not set. It is created automatically and removed when 65 // Shutdown() is called. 66 DataDir string 67 68 // Key is the optional encryption key for the keyring. 69 Key string 70 71 // Server is a reference to the started HTTP endpoint. 72 // It is valid after Start(). 73 Server *HTTPServer 74 75 // Agent is the embedded Nomad agent. 76 // It is valid after Start(). 77 *Agent 78 79 // RootToken is auto-bootstrapped if ACLs are enabled 80 RootToken *structs.ACLToken 81 } 82 83 // NewTestAgent returns a started agent with the given name and 84 // configuration. The caller should call Shutdown() to stop the agent and 85 // remove temporary directories. 86 func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent { 87 a := &TestAgent{ 88 T: t, 89 Name: name, 90 ConfigCallback: configCallback, 91 } 92 93 a.Start() 94 return a 95 } 96 97 // Start starts a test agent. 98 func (a *TestAgent) Start() *TestAgent { 99 if a.Agent != nil { 100 a.T.Fatalf("TestAgent already started") 101 } 102 if a.Config == nil { 103 a.Config = a.config() 104 } 105 if a.Config.DataDir == "" { 106 name := "agent" 107 if a.Name != "" { 108 name = a.Name + "-agent" 109 } 110 name = strings.Replace(name, "/", "_", -1) 111 d, err := ioutil.TempDir(TempDir, name) 112 if err != nil { 113 a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err) 114 } 115 a.DataDir = d 116 a.Config.DataDir = d 117 a.Config.NomadConfig.DataDir = d 118 } 119 120 i := 10 121 122 RETRY: 123 for ; i >= 0; i-- { 124 a.pickRandomPorts(a.Config) 125 if a.Config.NodeName == "" { 126 a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC) 127 } 128 129 // write the keyring 130 if a.Key != "" { 131 writeKey := func(key, filename string) { 132 path := filepath.Join(a.Config.DataDir, filename) 133 if err := initKeyring(path, key); err != nil { 134 a.T.Fatalf("Error creating keyring %s: %s", path, err) 135 } 136 } 137 writeKey(a.Key, serfKeyring) 138 } 139 140 // we need the err var in the next exit condition 141 if agent, err := a.start(); err == nil { 142 a.Agent = agent 143 break 144 } else if i == 0 { 145 a.T.Logf("%s: Error starting agent: %v", a.Name, err) 146 runtime.Goexit() 147 } else { 148 if agent != nil { 149 agent.Shutdown() 150 } 151 wait := time.Duration(rand.Int31n(2000)) * time.Millisecond 152 a.T.Logf("%s: retrying in %v", a.Name, wait) 153 time.Sleep(wait) 154 } 155 156 // Clean out the data dir if we are responsible for it before we 157 // try again, since the old ports may have gotten written to 158 // the data dir, such as in the Raft configuration. 159 if a.DataDir != "" { 160 if err := os.RemoveAll(a.DataDir); err != nil { 161 a.T.Logf("%s: Error resetting data dir: %v", a.Name, err) 162 runtime.Goexit() 163 } 164 } 165 } 166 167 failed := false 168 if a.Config.NomadConfig.Bootstrap && a.Config.Server.Enabled { 169 testutil.WaitForResult(func() (bool, error) { 170 args := &structs.GenericRequest{} 171 var leader string 172 err := a.RPC("Status.Leader", args, &leader) 173 return leader != "", err 174 }, func(err error) { 175 a.T.Logf("failed to find leader: %v", err) 176 failed = true 177 }) 178 } else { 179 testutil.WaitForResult(func() (bool, error) { 180 req, _ := http.NewRequest("GET", "/v1/agent/self", nil) 181 resp := httptest.NewRecorder() 182 _, err := a.Server.AgentSelfRequest(resp, req) 183 return err == nil && resp.Code == 200, err 184 }, func(err error) { 185 a.T.Logf("failed to find leader: %v", err) 186 failed = true 187 }) 188 } 189 if failed { 190 a.Agent.Shutdown() 191 goto RETRY 192 } 193 194 // Check if ACLs enabled. Use special value of PolicyTTL 0s 195 // to do a bypass of this step. This is so we can test bootstrap 196 // without having to pass down a special flag. 197 if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 { 198 a.RootToken = mock.ACLManagementToken() 199 state := a.Agent.server.State() 200 if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil { 201 a.T.Fatalf("token bootstrap failed: %v", err) 202 } 203 } 204 return a 205 } 206 207 func (a *TestAgent) start() (*Agent, error) { 208 if a.LogOutput == nil { 209 a.LogOutput = testlog.NewWriter(a.T) 210 } 211 212 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 213 metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm) 214 215 if inm == nil { 216 return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization") 217 } 218 219 logger := hclog.New(&hclog.LoggerOptions{ 220 Name: "agent", 221 Level: hclog.LevelFromString(a.Config.LogLevel), 222 Output: a.LogOutput, 223 JSONFormat: a.Config.LogJson, 224 }) 225 226 agent, err := NewAgent(a.Config, logger, a.LogOutput, inm) 227 if err != nil { 228 return nil, err 229 } 230 231 // Setup the HTTP server 232 http, err := NewHTTPServer(agent, a.Config) 233 if err != nil { 234 return agent, err 235 } 236 237 a.Server = http 238 return agent, nil 239 } 240 241 // Shutdown stops the agent and removes the data directory if it is 242 // managed by the test agent. 243 func (a *TestAgent) Shutdown() error { 244 defer func() { 245 if a.DataDir != "" { 246 os.RemoveAll(a.DataDir) 247 } 248 }() 249 250 // shutdown agent before endpoints 251 ch := make(chan error, 1) 252 go func() { 253 defer close(ch) 254 a.Server.Shutdown() 255 ch <- a.Agent.Shutdown() 256 }() 257 258 select { 259 case err := <-ch: 260 return err 261 case <-time.After(1 * time.Minute): 262 return fmt.Errorf("timed out while shutting down test agent") 263 } 264 } 265 266 func (a *TestAgent) HTTPAddr() string { 267 if a.Server == nil { 268 return "" 269 } 270 return "http://" + a.Server.Addr 271 } 272 273 func (a *TestAgent) Client() *api.Client { 274 conf := api.DefaultConfig() 275 conf.Address = a.HTTPAddr() 276 c, err := api.NewClient(conf) 277 if err != nil { 278 a.T.Fatalf("Error creating Nomad API client: %s", err) 279 } 280 return c 281 } 282 283 // pickRandomPorts selects random ports from fixed size random blocks of 284 // ports. This does not eliminate the chance for port conflict but 285 // reduces it significantly with little overhead. Furthermore, asking 286 // the kernel for a random port by binding to port 0 prolongs the test 287 // execution (in our case +20sec) while also not fully eliminating the 288 // chance of port conflicts for concurrently executed test binaries. 289 // Instead of relying on one set of ports to be sufficient we retry 290 // starting the agent with different ports on port conflict. 291 func (a *TestAgent) pickRandomPorts(c *Config) { 292 ports := freeport.GetT(a.T, 3) 293 c.Ports.HTTP = ports[0] 294 c.Ports.RPC = ports[1] 295 c.Ports.Serf = ports[2] 296 297 // Clear out the advertise addresses such that through retries we 298 // re-normalize the addresses correctly instead of using the values from the 299 // last port selection that had a port conflict. 300 if c.AdvertiseAddrs != nil { 301 c.AdvertiseAddrs.HTTP = "" 302 c.AdvertiseAddrs.RPC = "" 303 c.AdvertiseAddrs.Serf = "" 304 } 305 306 if err := c.normalizeAddrs(); err != nil { 307 a.T.Fatalf("error normalizing config: %v", err) 308 } 309 } 310 311 // TestConfig returns a unique default configuration for testing an 312 // agent. 313 func (a *TestAgent) config() *Config { 314 conf := DevConfig() 315 316 // Customize the server configuration 317 config := nomad.DefaultConfig() 318 conf.NomadConfig = config 319 320 // Set the name 321 conf.NodeName = a.Name 322 323 // Bind and set ports 324 conf.BindAddr = "127.0.0.1" 325 326 conf.Consul = sconfig.DefaultConsulConfig() 327 conf.Vault.Enabled = new(bool) 328 329 // Tighten the Serf timing 330 config.SerfConfig.MemberlistConfig.SuspicionMult = 2 331 config.SerfConfig.MemberlistConfig.RetransmitMult = 2 332 config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond 333 config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond 334 config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond 335 336 // Tighten the Raft timing 337 config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond 338 config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond 339 config.RaftConfig.ElectionTimeout = 40 * time.Millisecond 340 config.RaftConfig.StartAsLeader = true 341 config.RaftTimeout = 500 * time.Millisecond 342 343 // Tighten the autopilot timing 344 config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond 345 config.ServerHealthInterval = 50 * time.Millisecond 346 config.AutopilotInterval = 100 * time.Millisecond 347 348 // Bootstrap ourselves 349 config.Bootstrap = true 350 config.BootstrapExpect = 1 351 352 // Tighten the fingerprinter timeouts 353 if conf.Client.Options == nil { 354 conf.Client.Options = make(map[string]string) 355 } 356 conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true" 357 358 if a.ConfigCallback != nil { 359 a.ConfigCallback(conf) 360 } 361 362 return conf 363 }