github.com/quite/nomad@v0.8.6/command/agent/testagent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "math/rand" 8 "net/http" 9 "net/http/httptest" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "time" 15 16 "github.com/mitchellh/go-testing-interface" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/hashicorp/consul/lib/freeport" 20 "github.com/hashicorp/nomad/api" 21 "github.com/hashicorp/nomad/client/fingerprint" 22 "github.com/hashicorp/nomad/helper/testlog" 23 "github.com/hashicorp/nomad/nomad" 24 "github.com/hashicorp/nomad/nomad/mock" 25 "github.com/hashicorp/nomad/nomad/structs" 26 sconfig "github.com/hashicorp/nomad/nomad/structs/config" 27 "github.com/hashicorp/nomad/testutil" 28 ) 29 30 func init() { 31 rand.Seed(time.Now().UnixNano()) // seed random number generator 32 } 33 34 // TempDir defines the base dir for temporary directories. 35 var TempDir = os.TempDir() 36 37 // TestAgent encapsulates an Agent with a default configuration and startup 38 // procedure suitable for testing. It manages a temporary data directory which 39 // is removed after shutdown. 40 type TestAgent struct { 41 // T is the testing object 42 T testing.T 43 44 // Name is an optional name of the agent. 45 Name string 46 47 // ConfigCallback is an optional callback that allows modification of the 48 // configuration before the agent is started. 49 ConfigCallback func(*Config) 50 51 // Config is the agent configuration. If Config is nil then 52 // TestConfig() is used. If Config.DataDir is set then it is 53 // the callers responsibility to clean up the data directory. 54 // Otherwise, a temporary data directory is created and removed 55 // when Shutdown() is called. 56 Config *Config 57 58 // LogOutput is the sink for the logs. If nil, logs are written 59 // to os.Stderr. 60 LogOutput io.Writer 61 62 // DataDir is the data directory which is used when Config.DataDir 63 // is not set. It is created automatically and removed when 64 // Shutdown() is called. 65 DataDir string 66 67 // Key is the optional encryption key for the keyring. 68 Key string 69 70 // Server is a reference to the started HTTP endpoint. 71 // It is valid after Start(). 72 Server *HTTPServer 73 74 // Agent is the embedded Nomad agent. 75 // It is valid after Start(). 76 *Agent 77 78 // RootToken is auto-bootstrapped if ACLs are enabled 79 RootToken *structs.ACLToken 80 } 81 82 // NewTestAgent returns a started agent with the given name and 83 // configuration. The caller should call Shutdown() to stop the agent and 84 // remove temporary directories. 85 func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent { 86 a := &TestAgent{ 87 T: t, 88 Name: name, 89 ConfigCallback: configCallback, 90 } 91 92 a.Start() 93 return a 94 } 95 96 // Start starts a test agent. 97 func (a *TestAgent) Start() *TestAgent { 98 if a.Agent != nil { 99 a.T.Fatalf("TestAgent already started") 100 } 101 if a.Config == nil { 102 a.Config = a.config() 103 } 104 if a.Config.DataDir == "" { 105 name := "agent" 106 if a.Name != "" { 107 name = a.Name + "-agent" 108 } 109 name = strings.Replace(name, "/", "_", -1) 110 d, err := ioutil.TempDir(TempDir, name) 111 if err != nil { 112 a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err) 113 } 114 a.DataDir = d 115 a.Config.DataDir = d 116 a.Config.NomadConfig.DataDir = d 117 } 118 119 i := 10 120 121 RETRY: 122 for ; i >= 0; i-- { 123 a.pickRandomPorts(a.Config) 124 if a.Config.NodeName == "" { 125 a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC) 126 } 127 128 // write the keyring 129 if a.Key != "" { 130 writeKey := func(key, filename string) { 131 path := filepath.Join(a.Config.DataDir, filename) 132 if err := initKeyring(path, key); err != nil { 133 a.T.Fatalf("Error creating keyring %s: %s", path, err) 134 } 135 } 136 writeKey(a.Key, serfKeyring) 137 } 138 139 // we need the err var in the next exit condition 140 if agent, err := a.start(); err == nil { 141 a.Agent = agent 142 break 143 } else if i == 0 { 144 a.T.Logf("%s: Error starting agent: %v", a.Name, err) 145 runtime.Goexit() 146 } else { 147 if agent != nil { 148 agent.Shutdown() 149 } 150 wait := time.Duration(rand.Int31n(2000)) * time.Millisecond 151 a.T.Logf("%s: retrying in %v", a.Name, wait) 152 time.Sleep(wait) 153 } 154 155 // Clean out the data dir if we are responsible for it before we 156 // try again, since the old ports may have gotten written to 157 // the data dir, such as in the Raft configuration. 158 if a.DataDir != "" { 159 if err := os.RemoveAll(a.DataDir); err != nil { 160 a.T.Logf("%s: Error resetting data dir: %v", a.Name, err) 161 runtime.Goexit() 162 } 163 } 164 } 165 166 failed := false 167 if a.Config.NomadConfig.Bootstrap && a.Config.Server.Enabled { 168 testutil.WaitForResult(func() (bool, error) { 169 args := &structs.GenericRequest{} 170 var leader string 171 err := a.RPC("Status.Leader", args, &leader) 172 return leader != "", err 173 }, func(err error) { 174 a.T.Logf("failed to find leader: %v", err) 175 failed = true 176 }) 177 } else { 178 testutil.WaitForResult(func() (bool, error) { 179 req, _ := http.NewRequest("GET", "/v1/agent/self", nil) 180 resp := httptest.NewRecorder() 181 _, err := a.Server.AgentSelfRequest(resp, req) 182 return err == nil && resp.Code == 200, err 183 }, func(err error) { 184 a.T.Logf("failed to find leader: %v", err) 185 failed = true 186 }) 187 } 188 if failed { 189 a.Agent.Shutdown() 190 goto RETRY 191 } 192 193 // Check if ACLs enabled. Use special value of PolicyTTL 0s 194 // to do a bypass of this step. This is so we can test bootstrap 195 // without having to pass down a special flag. 196 if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 { 197 a.RootToken = mock.ACLManagementToken() 198 state := a.Agent.server.State() 199 if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil { 200 a.T.Fatalf("token bootstrap failed: %v", err) 201 } 202 } 203 return a 204 } 205 206 func (a *TestAgent) start() (*Agent, error) { 207 if a.LogOutput == nil { 208 a.LogOutput = testlog.NewWriter(a.T) 209 } 210 211 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 212 metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm) 213 214 if inm == nil { 215 return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization") 216 } 217 218 agent, err := NewAgent(a.Config, a.LogOutput, inm) 219 if err != nil { 220 return nil, err 221 } 222 223 // Setup the HTTP server 224 http, err := NewHTTPServer(agent, a.Config) 225 if err != nil { 226 return agent, err 227 } 228 229 a.Server = http 230 return agent, nil 231 } 232 233 // Shutdown stops the agent and removes the data directory if it is 234 // managed by the test agent. 235 func (a *TestAgent) Shutdown() error { 236 defer func() { 237 if a.DataDir != "" { 238 os.RemoveAll(a.DataDir) 239 } 240 }() 241 242 // shutdown agent before endpoints 243 a.Server.Shutdown() 244 return a.Agent.Shutdown() 245 } 246 247 func (a *TestAgent) HTTPAddr() string { 248 if a.Server == nil { 249 return "" 250 } 251 return "http://" + a.Server.Addr 252 } 253 254 func (a *TestAgent) Client() *api.Client { 255 conf := api.DefaultConfig() 256 conf.Address = a.HTTPAddr() 257 c, err := api.NewClient(conf) 258 if err != nil { 259 a.T.Fatalf("Error creating Nomad API client: %s", err) 260 } 261 return c 262 } 263 264 // pickRandomPorts selects random ports from fixed size random blocks of 265 // ports. This does not eliminate the chance for port conflict but 266 // reduces it significantly with little overhead. Furthermore, asking 267 // the kernel for a random port by binding to port 0 prolongs the test 268 // execution (in our case +20sec) while also not fully eliminating the 269 // chance of port conflicts for concurrently executed test binaries. 270 // Instead of relying on one set of ports to be sufficient we retry 271 // starting the agent with different ports on port conflict. 272 func (a *TestAgent) pickRandomPorts(c *Config) { 273 ports := freeport.GetT(a.T, 3) 274 c.Ports.HTTP = ports[0] 275 c.Ports.RPC = ports[1] 276 c.Ports.Serf = ports[2] 277 278 // Clear out the advertise addresses such that through retries we 279 // re-normalize the addresses correctly instead of using the values from the 280 // last port selection that had a port conflict. 281 if c.AdvertiseAddrs != nil { 282 c.AdvertiseAddrs.HTTP = "" 283 c.AdvertiseAddrs.RPC = "" 284 c.AdvertiseAddrs.Serf = "" 285 } 286 287 if err := c.normalizeAddrs(); err != nil { 288 a.T.Fatalf("error normalizing config: %v", err) 289 } 290 } 291 292 // TestConfig returns a unique default configuration for testing an 293 // agent. 294 func (a *TestAgent) config() *Config { 295 conf := DevConfig() 296 297 // Customize the server configuration 298 config := nomad.DefaultConfig() 299 conf.NomadConfig = config 300 301 // Set the name 302 conf.NodeName = a.Name 303 304 // Bind and set ports 305 conf.BindAddr = "127.0.0.1" 306 307 conf.Consul = sconfig.DefaultConsulConfig() 308 conf.Vault.Enabled = new(bool) 309 310 // Tighten the Serf timing 311 config.SerfConfig.MemberlistConfig.SuspicionMult = 2 312 config.SerfConfig.MemberlistConfig.RetransmitMult = 2 313 config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond 314 config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond 315 config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond 316 317 // Tighten the Raft timing 318 config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond 319 config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond 320 config.RaftConfig.ElectionTimeout = 40 * time.Millisecond 321 config.RaftConfig.StartAsLeader = true 322 config.RaftTimeout = 500 * time.Millisecond 323 324 // Tighten the autopilot timing 325 config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond 326 config.ServerHealthInterval = 50 * time.Millisecond 327 config.AutopilotInterval = 100 * time.Millisecond 328 329 // Bootstrap ourselves 330 config.Bootstrap = true 331 config.BootstrapExpect = 1 332 333 // Tighten the fingerprinter timeouts 334 if conf.Client.Options == nil { 335 conf.Client.Options = make(map[string]string) 336 } 337 conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true" 338 339 if a.ConfigCallback != nil { 340 a.ConfigCallback(conf) 341 } 342 343 return conf 344 }