github.com/manicqin/nomad@v0.9.5/command/agent/testagent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "math/rand" 8 "net/http" 9 "net/http/httptest" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "time" 15 16 testing "github.com/mitchellh/go-testing-interface" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/hashicorp/go-hclog" 20 "github.com/hashicorp/nomad/api" 21 "github.com/hashicorp/nomad/client/fingerprint" 22 "github.com/hashicorp/nomad/helper/freeport" 23 "github.com/hashicorp/nomad/helper/testlog" 24 "github.com/hashicorp/nomad/nomad" 25 "github.com/hashicorp/nomad/nomad/mock" 26 "github.com/hashicorp/nomad/nomad/structs" 27 sconfig "github.com/hashicorp/nomad/nomad/structs/config" 28 "github.com/hashicorp/nomad/testutil" 29 ) 30 31 func init() { 32 rand.Seed(time.Now().UnixNano()) // seed random number generator 33 } 34 35 // TempDir defines the base dir for temporary directories. 36 var TempDir = os.TempDir() 37 38 // TestAgent encapsulates an Agent with a default configuration and startup 39 // procedure suitable for testing. It manages a temporary data directory which 40 // is removed after shutdown. 41 type TestAgent struct { 42 // T is the testing object 43 T testing.T 44 45 // Name is an optional name of the agent. 46 Name string 47 48 // ConfigCallback is an optional callback that allows modification of the 49 // configuration before the agent is started. 50 ConfigCallback func(*Config) 51 52 // Config is the agent configuration. If Config is nil then 53 // TestConfig() is used. If Config.DataDir is set then it is 54 // the callers responsibility to clean up the data directory. 55 // Otherwise, a temporary data directory is created and removed 56 // when Shutdown() is called. 57 Config *Config 58 59 // LogOutput is the sink for the logs. If nil, logs are written 60 // to os.Stderr. 61 LogOutput io.Writer 62 63 // DataDir is the data directory which is used when Config.DataDir 64 // is not set. It is created automatically and removed when 65 // Shutdown() is called. 66 DataDir string 67 68 // Key is the optional encryption key for the keyring. 69 Key string 70 71 // Server is a reference to the started HTTP endpoint. 72 // It is valid after Start(). 73 Server *HTTPServer 74 75 // Agent is the embedded Nomad agent. 76 // It is valid after Start(). 77 *Agent 78 79 // RootToken is auto-bootstrapped if ACLs are enabled 80 RootToken *structs.ACLToken 81 82 // ports that are reserved through freeport that must be returned at 83 // the end of a test, done when Shutdown() is called. 84 ports []int 85 } 86 87 // NewTestAgent returns a started agent with the given name and 88 // configuration. The caller should call Shutdown() to stop the agent and 89 // remove temporary directories. 90 func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent { 91 a := &TestAgent{ 92 T: t, 93 Name: name, 94 ConfigCallback: configCallback, 95 } 96 97 a.Start() 98 return a 99 } 100 101 // Start starts a test agent. 102 func (a *TestAgent) Start() *TestAgent { 103 if a.Agent != nil { 104 a.T.Fatalf("TestAgent already started") 105 } 106 if a.Config == nil { 107 a.Config = a.config() 108 } 109 if a.Config.DataDir == "" { 110 name := "agent" 111 if a.Name != "" { 112 name = a.Name + "-agent" 113 } 114 name = strings.Replace(name, "/", "_", -1) 115 d, err := ioutil.TempDir(TempDir, name) 116 if err != nil { 117 a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err) 118 } 119 a.DataDir = d 120 a.Config.DataDir = d 121 a.Config.NomadConfig.DataDir = d 122 } 123 124 i := 10 125 126 RETRY: 127 for ; i >= 0; i-- { 128 a.pickRandomPorts(a.Config) 129 if a.Config.NodeName == "" { 130 a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC) 131 } 132 133 // write the keyring 134 if a.Key != "" { 135 writeKey := func(key, filename string) { 136 path := filepath.Join(a.Config.DataDir, filename) 137 if err := initKeyring(path, key); err != nil { 138 a.T.Fatalf("Error creating keyring %s: %s", path, err) 139 } 140 } 141 writeKey(a.Key, serfKeyring) 142 } 143 144 // we need the err var in the next exit condition 145 if agent, err := a.start(); err == nil { 146 a.Agent = agent 147 break 148 } else if i == 0 { 149 a.T.Logf("%s: Error starting agent: %v", a.Name, err) 150 runtime.Goexit() 151 } else { 152 if agent != nil { 153 agent.Shutdown() 154 } 155 wait := time.Duration(rand.Int31n(2000)) * time.Millisecond 156 a.T.Logf("%s: retrying in %v", a.Name, wait) 157 time.Sleep(wait) 158 } 159 160 // Clean out the data dir if we are responsible for it before we 161 // try again, since the old ports may have gotten written to 162 // the data dir, such as in the Raft configuration. 163 if a.DataDir != "" { 164 if err := os.RemoveAll(a.DataDir); err != nil { 165 a.T.Logf("%s: Error resetting data dir: %v", a.Name, err) 166 runtime.Goexit() 167 } 168 } 169 } 170 171 failed := false 172 if a.Config.NomadConfig.Bootstrap && a.Config.Server.Enabled { 173 testutil.WaitForResult(func() (bool, error) { 174 args := &structs.GenericRequest{} 175 var leader string 176 err := a.RPC("Status.Leader", args, &leader) 177 return leader != "", err 178 }, func(err error) { 179 a.T.Logf("failed to find leader: %v", err) 180 failed = true 181 }) 182 } else { 183 testutil.WaitForResult(func() (bool, error) { 184 req, _ := http.NewRequest("GET", "/v1/agent/self", nil) 185 resp := httptest.NewRecorder() 186 _, err := a.Server.AgentSelfRequest(resp, req) 187 return err == nil && resp.Code == 200, err 188 }, func(err error) { 189 a.T.Logf("failed to find leader: %v", err) 190 failed = true 191 }) 192 } 193 if failed { 194 a.Agent.Shutdown() 195 goto RETRY 196 } 197 198 // Check if ACLs enabled. Use special value of PolicyTTL 0s 199 // to do a bypass of this step. This is so we can test bootstrap 200 // without having to pass down a special flag. 201 if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 { 202 a.RootToken = mock.ACLManagementToken() 203 state := a.Agent.server.State() 204 if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil { 205 a.T.Fatalf("token bootstrap failed: %v", err) 206 } 207 } 208 return a 209 } 210 211 func (a *TestAgent) start() (*Agent, error) { 212 if a.LogOutput == nil { 213 a.LogOutput = testlog.NewWriter(a.T) 214 } 215 216 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 217 metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm) 218 219 if inm == nil { 220 return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization") 221 } 222 223 logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{ 224 Name: "agent", 225 Level: hclog.LevelFromString(a.Config.LogLevel), 226 Output: a.LogOutput, 227 JSONFormat: a.Config.LogJson, 228 }) 229 230 agent, err := NewAgent(a.Config, logger, a.LogOutput, inm) 231 if err != nil { 232 return nil, err 233 } 234 235 // Setup the HTTP server 236 http, err := NewHTTPServer(agent, a.Config) 237 if err != nil { 238 return agent, err 239 } 240 241 a.Server = http 242 return agent, nil 243 } 244 245 // Shutdown stops the agent and removes the data directory if it is 246 // managed by the test agent. 247 func (a *TestAgent) Shutdown() error { 248 defer freeport.Return(a.ports) 249 250 defer func() { 251 if a.DataDir != "" { 252 os.RemoveAll(a.DataDir) 253 } 254 }() 255 256 // shutdown agent before endpoints 257 ch := make(chan error, 1) 258 go func() { 259 defer close(ch) 260 a.Server.Shutdown() 261 ch <- a.Agent.Shutdown() 262 }() 263 264 select { 265 case err := <-ch: 266 return err 267 case <-time.After(1 * time.Minute): 268 return fmt.Errorf("timed out while shutting down test agent") 269 } 270 } 271 272 func (a *TestAgent) HTTPAddr() string { 273 if a.Server == nil { 274 return "" 275 } 276 return "http://" + a.Server.Addr 277 } 278 279 func (a *TestAgent) Client() *api.Client { 280 conf := api.DefaultConfig() 281 conf.Address = a.HTTPAddr() 282 c, err := api.NewClient(conf) 283 if err != nil { 284 a.T.Fatalf("Error creating Nomad API client: %s", err) 285 } 286 return c 287 } 288 289 // pickRandomPorts selects random ports from fixed size random blocks of 290 // ports. This does not eliminate the chance for port conflict but 291 // reduces it significantly with little overhead. Furthermore, asking 292 // the kernel for a random port by binding to port 0 prolongs the test 293 // execution (in our case +20sec) while also not fully eliminating the 294 // chance of port conflicts for concurrently executed test binaries. 295 // Instead of relying on one set of ports to be sufficient we retry 296 // starting the agent with different ports on port conflict. 297 func (a *TestAgent) pickRandomPorts(c *Config) { 298 ports := freeport.MustTake(3) 299 a.ports = append(a.ports, ports...) 300 301 c.Ports.HTTP = ports[0] 302 c.Ports.RPC = ports[1] 303 c.Ports.Serf = ports[2] 304 305 // Clear out the advertise addresses such that through retries we 306 // re-normalize the addresses correctly instead of using the values from the 307 // last port selection that had a port conflict. 308 if c.AdvertiseAddrs != nil { 309 c.AdvertiseAddrs.HTTP = "" 310 c.AdvertiseAddrs.RPC = "" 311 c.AdvertiseAddrs.Serf = "" 312 } 313 314 if err := c.normalizeAddrs(); err != nil { 315 a.T.Fatalf("error normalizing config: %v", err) 316 } 317 } 318 319 // TestConfig returns a unique default configuration for testing an 320 // agent. 321 func (a *TestAgent) config() *Config { 322 conf := DevConfig(nil) 323 324 // Customize the server configuration 325 config := nomad.DefaultConfig() 326 conf.NomadConfig = config 327 328 // Set the name 329 conf.NodeName = a.Name 330 331 // Bind and set ports 332 conf.BindAddr = "127.0.0.1" 333 334 conf.Consul = sconfig.DefaultConsulConfig() 335 conf.Vault.Enabled = new(bool) 336 337 // Tighten the Serf timing 338 config.SerfConfig.MemberlistConfig.SuspicionMult = 2 339 config.SerfConfig.MemberlistConfig.RetransmitMult = 2 340 config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond 341 config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond 342 config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond 343 344 // Tighten the Raft timing 345 config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond 346 config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond 347 config.RaftConfig.ElectionTimeout = 40 * time.Millisecond 348 config.RaftConfig.StartAsLeader = true 349 config.RaftTimeout = 500 * time.Millisecond 350 351 // Tighten the autopilot timing 352 config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond 353 config.ServerHealthInterval = 50 * time.Millisecond 354 config.AutopilotInterval = 100 * time.Millisecond 355 356 // Bootstrap ourselves 357 config.Bootstrap = true 358 config.BootstrapExpect = 1 359 360 // Tighten the fingerprinter timeouts 361 if conf.Client.Options == nil { 362 conf.Client.Options = make(map[string]string) 363 } 364 conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true" 365 366 if a.ConfigCallback != nil { 367 a.ConfigCallback(conf) 368 } 369 370 return conf 371 }