github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/cmd/jujud/agent/machine_test.go (about) 1 // Copyright 2012-2016 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package agent 5 6 import ( 7 "io/ioutil" 8 "os" 9 "path/filepath" 10 "reflect" 11 "runtime" 12 "strings" 13 "time" 14 15 "github.com/juju/cmd" 16 "github.com/juju/cmd/cmdtesting" 17 "github.com/juju/errors" 18 jc "github.com/juju/testing/checkers" 19 "github.com/juju/utils" 20 "github.com/juju/utils/arch" 21 "github.com/juju/utils/series" 22 "github.com/juju/utils/set" 23 "github.com/juju/utils/ssh" 24 sshtesting "github.com/juju/utils/ssh/testing" 25 "github.com/juju/utils/symlink" 26 "github.com/juju/version" 27 gc "gopkg.in/check.v1" 28 "gopkg.in/juju/charmrepo.v2-unstable" 29 "gopkg.in/juju/names.v2" 30 "gopkg.in/natefinch/lumberjack.v2" 31 "gopkg.in/tomb.v1" 32 33 "github.com/juju/juju/agent" 34 "github.com/juju/juju/api" 35 "github.com/juju/juju/api/imagemetadata" 36 apimachiner "github.com/juju/juju/api/machiner" 37 "github.com/juju/juju/apiserver/params" 38 "github.com/juju/juju/cert" 39 "github.com/juju/juju/cmd/jujud/agent/model" 40 "github.com/juju/juju/core/migration" 41 "github.com/juju/juju/environs" 42 envtesting "github.com/juju/juju/environs/testing" 43 "github.com/juju/juju/instance" 44 "github.com/juju/juju/juju" 45 "github.com/juju/juju/network" 46 "github.com/juju/juju/provider/dummy" 47 "github.com/juju/juju/state" 48 "github.com/juju/juju/state/watcher" 49 "github.com/juju/juju/status" 50 "github.com/juju/juju/storage" 51 coretesting "github.com/juju/juju/testing" 52 "github.com/juju/juju/tools" 53 jujuversion "github.com/juju/juju/version" 54 "github.com/juju/juju/worker" 55 "github.com/juju/juju/worker/authenticationworker" 56 "github.com/juju/juju/worker/certupdater" 57 "github.com/juju/juju/worker/dependency" 58 "github.com/juju/juju/worker/diskmanager" 59 "github.com/juju/juju/worker/instancepoller" 60 "github.com/juju/juju/worker/machiner" 61 "github.com/juju/juju/worker/migrationmaster" 62 "github.com/juju/juju/worker/mongoupgrader" 63 "github.com/juju/juju/worker/storageprovisioner" 64 "github.com/juju/juju/worker/upgrader" 65 "github.com/juju/juju/worker/workertest" 66 ) 67 68 type MachineSuite struct { 69 commonMachineSuite 70 } 71 72 var _ = gc.Suite(&MachineSuite{}) 73 74 func (s *MachineSuite) SetUpTest(c *gc.C) { 75 s.commonMachineSuite.SetUpTest(c) 76 // Most of these tests normally finish sub-second on a fast machine. 77 // If any given test hits a minute, we have almost certainly become 78 // wedged, so dump the logs. 79 coretesting.DumpTestLogsAfter(time.Minute, c, s) 80 } 81 82 func (s *MachineSuite) TestParseNonsense(c *gc.C) { 83 for _, args := range [][]string{ 84 {}, 85 {"--machine-id", "-4004"}, 86 } { 87 var agentConf agentConf 88 err := ParseAgentCommand(&machineAgentCmd{agentInitializer: &agentConf}, args) 89 c.Assert(err, gc.ErrorMatches, "--machine-id option must be set, and expects a non-negative integer") 90 } 91 } 92 93 func (s *MachineSuite) TestParseUnknown(c *gc.C) { 94 var agentConf agentConf 95 a := &machineAgentCmd{agentInitializer: &agentConf} 96 err := ParseAgentCommand(a, []string{"--machine-id", "42", "blistering barnacles"}) 97 c.Assert(err, gc.ErrorMatches, `unrecognized args: \["blistering barnacles"\]`) 98 } 99 100 func (s *MachineSuite) TestParseSuccess(c *gc.C) { 101 create := func() (cmd.Command, AgentConf) { 102 agentConf := agentConf{dataDir: s.DataDir()} 103 a := NewMachineAgentCmd( 104 nil, 105 NewTestMachineAgentFactory(&agentConf, nil, c.MkDir()), 106 &agentConf, 107 &agentConf, 108 ) 109 a.(*machineAgentCmd).logToStdErr = true 110 111 return a, &agentConf 112 } 113 a := CheckAgentCommand(c, create, []string{"--machine-id", "42"}) 114 c.Assert(a.(*machineAgentCmd).machineId, gc.Equals, "42") 115 } 116 117 func (s *MachineSuite) TestRunInvalidMachineId(c *gc.C) { 118 c.Skip("agents don't yet distinguish between temporary and permanent errors") 119 m, _, _ := s.primeAgent(c, state.JobHostUnits) 120 err := s.newAgent(c, m).Run(nil) 121 c.Assert(err, gc.ErrorMatches, "some error") 122 } 123 124 func (s *MachineSuite) TestUseLumberjack(c *gc.C) { 125 ctx := cmdtesting.Context(c) 126 agentConf := FakeAgentConfig{} 127 128 a := NewMachineAgentCmd( 129 ctx, 130 NewTestMachineAgentFactory(&agentConf, nil, c.MkDir()), 131 agentConf, 132 agentConf, 133 ) 134 // little hack to set the data that Init expects to already be set 135 a.(*machineAgentCmd).machineId = "42" 136 137 err := a.Init(nil) 138 c.Assert(err, gc.IsNil) 139 140 l, ok := ctx.Stderr.(*lumberjack.Logger) 141 c.Assert(ok, jc.IsTrue) 142 c.Check(l.MaxAge, gc.Equals, 0) 143 c.Check(l.MaxBackups, gc.Equals, 2) 144 c.Check(l.Filename, gc.Equals, filepath.FromSlash("/var/log/juju/machine-42.log")) 145 c.Check(l.MaxSize, gc.Equals, 300) 146 } 147 148 func (s *MachineSuite) TestDontUseLumberjack(c *gc.C) { 149 ctx := cmdtesting.Context(c) 150 agentConf := FakeAgentConfig{} 151 152 a := NewMachineAgentCmd( 153 ctx, 154 NewTestMachineAgentFactory(&agentConf, nil, c.MkDir()), 155 agentConf, 156 agentConf, 157 ) 158 // little hack to set the data that Init expects to already be set 159 a.(*machineAgentCmd).machineId = "42" 160 161 // set the value that normally gets set by the flag parsing 162 a.(*machineAgentCmd).logToStdErr = true 163 164 err := a.Init(nil) 165 c.Assert(err, gc.IsNil) 166 167 _, ok := ctx.Stderr.(*lumberjack.Logger) 168 c.Assert(ok, jc.IsFalse) 169 } 170 171 func (s *MachineSuite) TestRunStop(c *gc.C) { 172 m, ac, _ := s.primeAgent(c, state.JobHostUnits) 173 a := s.newAgent(c, m) 174 done := make(chan error) 175 go func() { 176 done <- a.Run(nil) 177 }() 178 err := a.Stop() 179 c.Assert(err, jc.ErrorIsNil) 180 c.Assert(<-done, jc.ErrorIsNil) 181 c.Assert(charmrepo.CacheDir, gc.Equals, filepath.Join(ac.DataDir(), "charmcache")) 182 } 183 184 func (s *MachineSuite) TestWithDeadMachine(c *gc.C) { 185 m, ac, _ := s.primeAgent(c, state.JobHostUnits) 186 err := m.EnsureDead() 187 c.Assert(err, jc.ErrorIsNil) 188 a := s.newAgent(c, m) 189 err = runWithTimeout(a) 190 c.Assert(err, jc.ErrorIsNil) 191 192 _, err = os.Stat(ac.DataDir()) 193 c.Assert(err, jc.Satisfies, os.IsNotExist) 194 } 195 196 func (s *MachineSuite) TestWithRemovedMachine(c *gc.C) { 197 m, ac, _ := s.primeAgent(c, state.JobHostUnits) 198 err := m.EnsureDead() 199 c.Assert(err, jc.ErrorIsNil) 200 err = m.Remove() 201 c.Assert(err, jc.ErrorIsNil) 202 a := s.newAgent(c, m) 203 err = runWithTimeout(a) 204 c.Assert(err, jc.ErrorIsNil) 205 206 _, err = os.Stat(ac.DataDir()) 207 c.Assert(err, jc.Satisfies, os.IsNotExist) 208 } 209 210 func (s *MachineSuite) TestDyingMachine(c *gc.C) { 211 m, _, _ := s.primeAgent(c, state.JobHostUnits) 212 a := s.newAgent(c, m) 213 done := make(chan error) 214 go func() { 215 done <- a.Run(nil) 216 }() 217 defer func() { 218 c.Check(a.Stop(), jc.ErrorIsNil) 219 }() 220 // Wait for configuration to be finished 221 <-a.WorkersStarted() 222 err := m.Destroy() 223 c.Assert(err, jc.ErrorIsNil) 224 select { 225 case err := <-done: 226 c.Assert(err, jc.ErrorIsNil) 227 case <-time.After(watcher.Period * 5 / 4): 228 // TODO(rog) Fix this so it doesn't wait for so long. 229 // https://bugs.launchpad.net/juju-core/+bug/1163983 230 c.Fatalf("timed out waiting for agent to terminate") 231 } 232 err = m.Refresh() 233 c.Assert(err, jc.ErrorIsNil) 234 c.Assert(m.Life(), gc.Equals, state.Dead) 235 } 236 237 func (s *MachineSuite) TestHostUnits(c *gc.C) { 238 m, _, _ := s.primeAgent(c, state.JobHostUnits) 239 a := s.newAgent(c, m) 240 ctx, reset := patchDeployContext(c, s.BackingState) 241 defer reset() 242 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 243 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 244 245 // check that unassigned units don't trigger any deployments. 246 svc := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress")) 247 u0, err := svc.AddUnit() 248 c.Assert(err, jc.ErrorIsNil) 249 u1, err := svc.AddUnit() 250 c.Assert(err, jc.ErrorIsNil) 251 252 ctx.waitDeployed(c) 253 254 // assign u0, check it's deployed. 255 err = u0.AssignToMachine(m) 256 c.Assert(err, jc.ErrorIsNil) 257 ctx.waitDeployed(c, u0.Name()) 258 259 // "start the agent" for u0 to prevent short-circuited remove-on-destroy; 260 // check that it's kept deployed despite being Dying. 261 // lp:1558657 262 now := time.Now() 263 sInfo := status.StatusInfo{ 264 Status: status.Idle, 265 Message: "", 266 Since: &now, 267 } 268 err = u0.SetAgentStatus(sInfo) 269 c.Assert(err, jc.ErrorIsNil) 270 err = u0.Destroy() 271 c.Assert(err, jc.ErrorIsNil) 272 ctx.waitDeployed(c, u0.Name()) 273 274 // add u1 to the machine, check it's deployed. 275 err = u1.AssignToMachine(m) 276 c.Assert(err, jc.ErrorIsNil) 277 ctx.waitDeployed(c, u0.Name(), u1.Name()) 278 279 // make u0 dead; check the deployer recalls the unit and removes it from 280 // state. 281 err = u0.EnsureDead() 282 c.Assert(err, jc.ErrorIsNil) 283 ctx.waitDeployed(c, u1.Name()) 284 285 // The deployer actually removes the unit just after 286 // removing its deployment, so we need to poll here 287 // until it actually happens. 288 for attempt := coretesting.LongAttempt.Start(); attempt.Next(); { 289 if !attempt.HasNext() { 290 c.Fatalf("timeout waiting for unit %q to be removed", u0.Name()) 291 } 292 if err := u0.Refresh(); err == nil { 293 c.Logf("waiting unit %q to be removed...", u0.Name()) 294 continue 295 } else { 296 c.Assert(err, jc.Satisfies, errors.IsNotFound) 297 break 298 } 299 } 300 301 // short-circuit-remove u1 after it's been deployed; check it's recalled 302 // and removed from state. 303 err = u1.Destroy() 304 c.Assert(err, jc.ErrorIsNil) 305 err = u1.Refresh() 306 c.Assert(err, jc.Satisfies, errors.IsNotFound) 307 ctx.waitDeployed(c) 308 } 309 310 func (s *MachineSuite) TestManageModel(c *gc.C) { 311 usefulVersion := version.Binary{ 312 Number: jujuversion.Current, 313 Arch: arch.HostArch(), 314 Series: "quantal", // to match the charm created below 315 } 316 envtesting.AssertUploadFakeToolsVersions(c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), usefulVersion) 317 m, _, _ := s.primeAgent(c, state.JobManageModel) 318 op := make(chan dummy.Operation, 200) 319 dummy.Listen(op) 320 321 a := s.newAgent(c, m) 322 // Make sure the agent is stopped even if the test fails. 323 defer a.Stop() 324 done := make(chan error) 325 go func() { 326 done <- a.Run(nil) 327 }() 328 c.Logf("started test agent, waiting for workers...") 329 r0 := s.singularRecord.nextRunner(c) 330 r0.waitForWorker(c, "txnpruner") 331 332 // Check that the provisioner and firewaller are alive by doing 333 // a rudimentary check that it responds to state changes. 334 335 // Create an exposed service, and add a unit. 336 charm := s.AddTestingCharm(c, "dummy") 337 svc := s.AddTestingService(c, "test-service", charm) 338 err := svc.SetExposed() 339 c.Assert(err, jc.ErrorIsNil) 340 units, err := juju.AddUnits(s.State, svc, svc.Name(), 1, nil) 341 c.Assert(err, jc.ErrorIsNil) 342 343 // It should be allocated to a machine, which should then be provisioned. 344 c.Logf("service %q added with 1 unit, waiting for unit %q's machine to be started...", svc.Name(), units[0].Name()) 345 c.Check(opRecvTimeout(c, s.State, op, dummy.OpStartInstance{}), gc.NotNil) 346 c.Logf("machine hosting unit %q started, waiting for the unit to be deployed...", units[0].Name()) 347 s.waitProvisioned(c, units[0]) 348 349 // Open a port on the unit; it should be handled by the firewaller. 350 c.Logf("unit %q deployed, opening port tcp/999...", units[0].Name()) 351 err = units[0].OpenPort("tcp", 999) 352 c.Assert(err, jc.ErrorIsNil) 353 c.Check(opRecvTimeout(c, s.State, op, dummy.OpOpenPorts{}), gc.NotNil) 354 c.Logf("unit %q port tcp/999 opened, cleaning up...", units[0].Name()) 355 356 err = a.Stop() 357 c.Assert(err, jc.ErrorIsNil) 358 select { 359 case err := <-done: 360 c.Assert(err, jc.ErrorIsNil) 361 case <-time.After(coretesting.LongWait): 362 c.Fatalf("timed out waiting for agent to terminate") 363 } 364 c.Logf("test agent stopped successfully.") 365 } 366 367 func (s *MachineSuite) TestManageModelRunsInstancePoller(c *gc.C) { 368 s.AgentSuite.PatchValue(&instancepoller.ShortPoll, 500*time.Millisecond) 369 usefulVersion := version.Binary{ 370 Number: jujuversion.Current, 371 Arch: arch.HostArch(), 372 Series: "quantal", // to match the charm created below 373 } 374 envtesting.AssertUploadFakeToolsVersions( 375 c, s.DefaultToolsStorage, 376 s.Environ.Config().AgentStream(), 377 s.Environ.Config().AgentStream(), 378 usefulVersion, 379 ) 380 m, _, _ := s.primeAgent(c, state.JobManageModel) 381 a := s.newAgent(c, m) 382 defer a.Stop() 383 go func() { 384 c.Check(a.Run(nil), jc.ErrorIsNil) 385 }() 386 387 // Add one unit to a service; 388 charm := s.AddTestingCharm(c, "dummy") 389 svc := s.AddTestingService(c, "test-service", charm) 390 units, err := juju.AddUnits(s.State, svc, svc.Name(), 1, nil) 391 c.Assert(err, jc.ErrorIsNil) 392 393 m, instId := s.waitProvisioned(c, units[0]) 394 insts, err := s.Environ.Instances([]instance.Id{instId}) 395 c.Assert(err, jc.ErrorIsNil) 396 addrs := network.NewAddresses("1.2.3.4") 397 dummy.SetInstanceAddresses(insts[0], addrs) 398 dummy.SetInstanceStatus(insts[0], "running") 399 400 for attempt := coretesting.LongAttempt.Start(); attempt.Next(); { 401 if !attempt.HasNext() { 402 c.Logf("final machine addresses: %#v", m.Addresses()) 403 c.Fatalf("timed out waiting for machine to get address") 404 } 405 err := m.Refresh() 406 c.Assert(err, jc.ErrorIsNil) 407 instStatus, err := m.InstanceStatus() 408 c.Assert(err, jc.ErrorIsNil) 409 c.Logf("found status is %q %q", instStatus.Status, instStatus.Message) 410 if reflect.DeepEqual(m.Addresses(), addrs) && instStatus.Message == "running" { 411 c.Logf("machine %q address updated: %+v", m.Id(), addrs) 412 break 413 } 414 c.Logf("waiting for machine %q address to be updated", m.Id()) 415 } 416 } 417 418 func (s *MachineSuite) TestManageModelRunsPeergrouper(c *gc.C) { 419 started := newSignal() 420 s.AgentSuite.PatchValue(&peergrouperNew, func(st *state.State, _ bool) (worker.Worker, error) { 421 c.Check(st, gc.NotNil) 422 started.trigger() 423 return newDummyWorker(), nil 424 }) 425 m, _, _ := s.primeAgent(c, state.JobManageModel) 426 a := s.newAgent(c, m) 427 defer a.Stop() 428 go func() { 429 c.Check(a.Run(nil), jc.ErrorIsNil) 430 }() 431 started.assertTriggered(c, "peergrouperworker to start") 432 } 433 434 func (s *MachineSuite) TestManageModelRunsDbLogPrunerIfFeatureFlagEnabled(c *gc.C) { 435 m, _, _ := s.primeAgent(c, state.JobManageModel) 436 a := s.newAgent(c, m) 437 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 438 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 439 440 runner := s.singularRecord.nextRunner(c) 441 runner.waitForWorker(c, "dblogpruner") 442 } 443 444 func (s *MachineSuite) TestManageModelCallsUseMultipleCPUs(c *gc.C) { 445 // If it has been enabled, the JobManageModel agent should call utils.UseMultipleCPUs 446 usefulVersion := version.Binary{ 447 Number: jujuversion.Current, 448 Arch: arch.HostArch(), 449 Series: "quantal", // to match the charm created below 450 } 451 envtesting.AssertUploadFakeToolsVersions( 452 c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), usefulVersion) 453 m, _, _ := s.primeAgent(c, state.JobManageModel) 454 calledChan := make(chan struct{}, 1) 455 s.AgentSuite.PatchValue(&useMultipleCPUs, func() { calledChan <- struct{}{} }) 456 // Now, start the agent, and observe that a JobManageModel agent 457 // calls UseMultipleCPUs 458 a := s.newAgent(c, m) 459 defer a.Stop() 460 go func() { 461 c.Check(a.Run(nil), jc.ErrorIsNil) 462 }() 463 // Wait for configuration to be finished 464 <-a.WorkersStarted() 465 s.assertChannelActive(c, calledChan, "UseMultipleCPUs() to be called") 466 467 c.Check(a.Stop(), jc.ErrorIsNil) 468 // However, an agent that just JobHostUnits doesn't call UseMultipleCPUs 469 m2, _, _ := s.primeAgent(c, state.JobHostUnits) 470 a2 := s.newAgent(c, m2) 471 defer a2.Stop() 472 go func() { 473 c.Check(a2.Run(nil), jc.ErrorIsNil) 474 }() 475 // Wait until all the workers have been started, and then kill everything 476 <-a2.workersStarted 477 c.Check(a2.Stop(), jc.ErrorIsNil) 478 s.assertChannelInactive(c, calledChan, "UseMultipleCPUs() was called") 479 } 480 481 func (s *MachineSuite) waitProvisioned(c *gc.C, unit *state.Unit) (*state.Machine, instance.Id) { 482 c.Logf("waiting for unit %q to be provisioned", unit) 483 machineId, err := unit.AssignedMachineId() 484 c.Assert(err, jc.ErrorIsNil) 485 m, err := s.State.Machine(machineId) 486 c.Assert(err, jc.ErrorIsNil) 487 w := m.Watch() 488 defer worker.Stop(w) 489 timeout := time.After(coretesting.LongWait) 490 for { 491 select { 492 case <-timeout: 493 c.Fatalf("timed out waiting for provisioning") 494 case <-time.After(coretesting.ShortWait): 495 s.State.StartSync() 496 case _, ok := <-w.Changes(): 497 c.Assert(ok, jc.IsTrue) 498 err := m.Refresh() 499 c.Assert(err, jc.ErrorIsNil) 500 if instId, err := m.InstanceId(); err == nil { 501 c.Logf("unit provisioned with instance %s", instId) 502 return m, instId 503 } else { 504 c.Check(err, jc.Satisfies, errors.IsNotProvisioned) 505 } 506 } 507 } 508 } 509 510 func (s *MachineSuite) testUpgradeRequest(c *gc.C, agent runner, tag string, currentTools *tools.Tools) { 511 newVers := version.Binary{ 512 Number: jujuversion.Current, 513 Arch: arch.HostArch(), 514 Series: series.HostSeries(), 515 } 516 newVers.Patch++ 517 newTools := envtesting.AssertUploadFakeToolsVersions( 518 c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), newVers)[0] 519 err := s.State.SetModelAgentVersion(newVers.Number) 520 c.Assert(err, jc.ErrorIsNil) 521 err = runWithTimeout(agent) 522 envtesting.CheckUpgraderReadyError(c, err, &upgrader.UpgradeReadyError{ 523 AgentName: tag, 524 OldTools: currentTools.Version, 525 NewTools: newTools.Version, 526 DataDir: s.DataDir(), 527 }) 528 } 529 530 func (s *MachineSuite) TestUpgradeRequest(c *gc.C) { 531 m, _, currentTools := s.primeAgent(c, state.JobManageModel, state.JobHostUnits) 532 a := s.newAgent(c, m) 533 s.testUpgradeRequest(c, a, m.Tag().String(), currentTools) 534 c.Assert(a.isInitialUpgradeCheckPending(), jc.IsTrue) 535 } 536 537 func (s *MachineSuite) TestNoUpgradeRequired(c *gc.C) { 538 m, _, _ := s.primeAgent(c, state.JobManageModel, state.JobHostUnits) 539 a := s.newAgent(c, m) 540 done := make(chan error) 541 go func() { done <- a.Run(nil) }() 542 select { 543 case <-a.initialUpgradeCheckComplete.Unlocked(): 544 case <-time.After(coretesting.LongWait): 545 c.Fatalf("timeout waiting for upgrade check") 546 } 547 defer a.Stop() // in case of failure 548 s.waitStopped(c, state.JobManageModel, a, done) 549 c.Assert(a.isInitialUpgradeCheckPending(), jc.IsFalse) 550 } 551 552 func (s *MachineSuite) waitStopped(c *gc.C, job state.MachineJob, a *MachineAgent, done chan error) { 553 err := a.Stop() 554 if job == state.JobManageModel { 555 // When shutting down, the API server can be shut down before 556 // the other workers that connect to it, so they get an error so 557 // they then die, causing Stop to return an error. It's not 558 // easy to control the actual error that's received in this 559 // circumstance so we just log it rather than asserting that it 560 // is not nil. 561 if err != nil { 562 c.Logf("error shutting down state manager: %v", err) 563 } 564 } else { 565 c.Assert(err, jc.ErrorIsNil) 566 } 567 568 select { 569 case err := <-done: 570 c.Assert(err, jc.ErrorIsNil) 571 case <-time.After(coretesting.LongWait): 572 c.Fatalf("timed out waiting for agent to terminate") 573 } 574 } 575 576 func (s *MachineSuite) assertJobWithState( 577 c *gc.C, 578 job state.MachineJob, 579 test func(agent.Config, *state.State), 580 ) { 581 paramsJob := job.ToParams() 582 if !paramsJob.NeedsState() { 583 c.Fatalf("%v does not use state", paramsJob) 584 } 585 s.assertAgentOpensState(c, job, test) 586 } 587 588 // assertAgentOpensState asserts that a machine agent started with the 589 // given job. The agent's configuration and the agent's state.State are 590 // then passed to the test function for further checking. 591 func (s *MachineSuite) assertAgentOpensState(c *gc.C, job state.MachineJob, test func(agent.Config, *state.State)) { 592 stm, conf, _ := s.primeAgent(c, job) 593 a := s.newAgent(c, stm) 594 defer a.Stop() 595 logger.Debugf("new agent %#v", a) 596 597 // All state jobs currently also run an APIWorker, so no 598 // need to check for that here, like in assertJobWithState. 599 st, done := s.waitForOpenState(c, a) 600 test(conf, st) 601 s.waitStopped(c, job, a, done) 602 } 603 604 func (s *MachineSuite) waitForOpenState(c *gc.C, a *MachineAgent) (*state.State, chan error) { 605 agentAPIs := make(chan *state.State, 1) 606 s.AgentSuite.PatchValue(&reportOpenedState, func(st *state.State) { 607 select { 608 case agentAPIs <- st: 609 default: 610 } 611 }) 612 613 done := make(chan error) 614 go func() { 615 done <- a.Run(nil) 616 }() 617 618 select { 619 case agentAPI := <-agentAPIs: 620 c.Assert(agentAPI, gc.NotNil) 621 return agentAPI, done 622 case <-time.After(coretesting.LongWait): 623 c.Fatalf("API not opened") 624 } 625 panic("can't happen") 626 } 627 628 func (s *MachineSuite) TestManageModelServesAPI(c *gc.C) { 629 s.assertJobWithState(c, state.JobManageModel, func(conf agent.Config, agentState *state.State) { 630 apiInfo, ok := conf.APIInfo() 631 c.Assert(ok, jc.IsTrue) 632 st, err := api.Open(apiInfo, fastDialOpts) 633 c.Assert(err, jc.ErrorIsNil) 634 defer st.Close() 635 m, err := apimachiner.NewState(st).Machine(conf.Tag().(names.MachineTag)) 636 c.Assert(err, jc.ErrorIsNil) 637 c.Assert(m.Life(), gc.Equals, params.Alive) 638 }) 639 } 640 641 func (s *MachineSuite) assertAgentSetsToolsVersion(c *gc.C, job state.MachineJob) { 642 vers := version.Binary{ 643 Number: jujuversion.Current, 644 Arch: arch.HostArch(), 645 Series: series.HostSeries(), 646 } 647 vers.Minor++ 648 m, _, _ := s.primeAgentVersion(c, vers, job) 649 a := s.newAgent(c, m) 650 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 651 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 652 653 timeout := time.After(coretesting.LongWait) 654 for done := false; !done; { 655 select { 656 case <-timeout: 657 c.Fatalf("timeout while waiting for agent version to be set") 658 case <-time.After(coretesting.ShortWait): 659 c.Log("Refreshing") 660 err := m.Refresh() 661 c.Assert(err, jc.ErrorIsNil) 662 c.Log("Fetching agent tools") 663 agentTools, err := m.AgentTools() 664 c.Assert(err, jc.ErrorIsNil) 665 c.Logf("(%v vs. %v)", agentTools.Version, jujuversion.Current) 666 if agentTools.Version.Minor != jujuversion.Current.Minor { 667 continue 668 } 669 c.Assert(agentTools.Version.Number, gc.DeepEquals, jujuversion.Current) 670 done = true 671 } 672 } 673 } 674 675 func (s *MachineSuite) TestAgentSetsToolsVersionManageModel(c *gc.C) { 676 s.assertAgentSetsToolsVersion(c, state.JobManageModel) 677 } 678 679 func (s *MachineSuite) TestAgentSetsToolsVersionHostUnits(c *gc.C) { 680 s.assertAgentSetsToolsVersion(c, state.JobHostUnits) 681 } 682 683 func (s *MachineSuite) TestManageModelRunsCleaner(c *gc.C) { 684 s.assertJobWithState(c, state.JobManageModel, func(conf agent.Config, agentState *state.State) { 685 // Create a service and unit, and destroy the service. 686 service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress")) 687 unit, err := service.AddUnit() 688 c.Assert(err, jc.ErrorIsNil) 689 err = service.Destroy() 690 c.Assert(err, jc.ErrorIsNil) 691 692 // Check the unit was not yet removed. 693 err = unit.Refresh() 694 c.Assert(err, jc.ErrorIsNil) 695 w := unit.Watch() 696 defer worker.Stop(w) 697 698 // Trigger a sync on the state used by the agent, and wait 699 // for the unit to be removed. 700 agentState.StartSync() 701 timeout := time.After(coretesting.LongWait) 702 for done := false; !done; { 703 select { 704 case <-timeout: 705 c.Fatalf("unit not cleaned up") 706 case <-time.After(coretesting.ShortWait): 707 s.State.StartSync() 708 case <-w.Changes(): 709 err := unit.Refresh() 710 if errors.IsNotFound(err) { 711 done = true 712 } else { 713 c.Assert(err, jc.ErrorIsNil) 714 } 715 } 716 } 717 }) 718 } 719 720 func (s *MachineSuite) TestJobManageModelRunsMinUnitsWorker(c *gc.C) { 721 s.assertJobWithState(c, state.JobManageModel, func(_ agent.Config, agentState *state.State) { 722 // Ensure that the MinUnits worker is alive by doing a simple check 723 // that it responds to state changes: add a service, set its minimum 724 // number of units to one, wait for the worker to add the missing unit. 725 service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress")) 726 err := service.SetMinUnits(1) 727 c.Assert(err, jc.ErrorIsNil) 728 w := service.Watch() 729 defer worker.Stop(w) 730 731 // Trigger a sync on the state used by the agent, and wait for the unit 732 // to be created. 733 agentState.StartSync() 734 timeout := time.After(coretesting.LongWait) 735 for { 736 select { 737 case <-timeout: 738 c.Fatalf("unit not created") 739 case <-time.After(coretesting.ShortWait): 740 s.State.StartSync() 741 case <-w.Changes(): 742 units, err := service.AllUnits() 743 c.Assert(err, jc.ErrorIsNil) 744 if len(units) == 1 { 745 return 746 } 747 } 748 } 749 }) 750 } 751 752 func (s *MachineSuite) TestMachineAgentRunsAuthorisedKeysWorker(c *gc.C) { 753 //TODO(bogdanteleaga): Fix once we get authentication worker up on windows 754 if runtime.GOOS == "windows" { 755 c.Skip("bug 1403084: authentication worker not yet implemented on windows") 756 } 757 // Start the machine agent. 758 m, _, _ := s.primeAgent(c, state.JobHostUnits) 759 a := s.newAgent(c, m) 760 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 761 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 762 763 // Update the keys in the environment. 764 sshKey := sshtesting.ValidKeyOne.Key + " user@host" 765 err := s.BackingState.UpdateModelConfig(map[string]interface{}{"authorized-keys": sshKey}, nil, nil) 766 c.Assert(err, jc.ErrorIsNil) 767 768 // Wait for ssh keys file to be updated. 769 s.State.StartSync() 770 timeout := time.After(coretesting.LongWait) 771 sshKeyWithCommentPrefix := sshtesting.ValidKeyOne.Key + " Juju:user@host" 772 for { 773 select { 774 case <-timeout: 775 c.Fatalf("timeout while waiting for authorised ssh keys to change") 776 case <-time.After(coretesting.ShortWait): 777 s.State.StartSync() 778 keys, err := ssh.ListKeys(authenticationworker.SSHUser, ssh.FullKeys) 779 c.Assert(err, jc.ErrorIsNil) 780 keysStr := strings.Join(keys, "\n") 781 if sshKeyWithCommentPrefix != keysStr { 782 continue 783 } 784 return 785 } 786 } 787 } 788 789 func (s *MachineSuite) TestMachineAgentSymlinks(c *gc.C) { 790 stm, _, _ := s.primeAgent(c, state.JobManageModel) 791 a := s.newAgent(c, stm) 792 defer a.Stop() 793 _, done := s.waitForOpenState(c, a) 794 795 // Symlinks should have been created 796 for _, link := range []string{jujuRun, jujuDumpLogs} { 797 _, err := os.Stat(utils.EnsureBaseDir(a.rootDir, link)) 798 c.Assert(err, jc.ErrorIsNil, gc.Commentf(link)) 799 } 800 801 s.waitStopped(c, state.JobManageModel, a, done) 802 } 803 804 func (s *MachineSuite) TestMachineAgentSymlinkJujuRunExists(c *gc.C) { 805 if runtime.GOOS == "windows" { 806 // Cannot make symlink to nonexistent file on windows or 807 // create a file point a symlink to it then remove it 808 c.Skip("Cannot test this on windows") 809 } 810 811 stm, _, _ := s.primeAgent(c, state.JobManageModel) 812 a := s.newAgent(c, stm) 813 defer a.Stop() 814 815 // Pre-create the symlinks, but pointing to the incorrect location. 816 links := []string{jujuRun, jujuDumpLogs} 817 a.rootDir = c.MkDir() 818 for _, link := range links { 819 fullLink := utils.EnsureBaseDir(a.rootDir, link) 820 c.Assert(os.MkdirAll(filepath.Dir(fullLink), os.FileMode(0755)), jc.ErrorIsNil) 821 c.Assert(symlink.New("/nowhere/special", fullLink), jc.ErrorIsNil, gc.Commentf(link)) 822 } 823 824 // Start the agent and wait for it be running. 825 _, done := s.waitForOpenState(c, a) 826 827 // juju-run symlink should have been recreated. 828 for _, link := range links { 829 fullLink := utils.EnsureBaseDir(a.rootDir, link) 830 linkTarget, err := symlink.Read(fullLink) 831 c.Assert(err, jc.ErrorIsNil) 832 c.Assert(linkTarget, gc.Not(gc.Equals), "/nowhere/special", gc.Commentf(link)) 833 } 834 835 s.waitStopped(c, state.JobManageModel, a, done) 836 } 837 838 func (s *MachineSuite) TestMachineAgentUninstall(c *gc.C) { 839 m, ac, _ := s.primeAgent(c, state.JobHostUnits) 840 err := m.EnsureDead() 841 c.Assert(err, jc.ErrorIsNil) 842 a := s.newAgent(c, m) 843 err = runWithTimeout(a) 844 c.Assert(err, jc.ErrorIsNil) 845 846 // juju-run and juju-dumplogs symlinks should have been removed on 847 // termination. 848 for _, link := range []string{jujuRun, jujuDumpLogs} { 849 _, err = os.Stat(utils.EnsureBaseDir(a.rootDir, link)) 850 c.Assert(err, jc.Satisfies, os.IsNotExist) 851 } 852 853 // data-dir should have been removed on termination 854 _, err = os.Stat(ac.DataDir()) 855 c.Assert(err, jc.Satisfies, os.IsNotExist) 856 } 857 858 func (s *MachineSuite) TestMachineAgentRunsAPIAddressUpdaterWorker(c *gc.C) { 859 // Start the machine agent. 860 m, _, _ := s.primeAgent(c, state.JobHostUnits) 861 a := s.newAgent(c, m) 862 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 863 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 864 865 // Update the API addresses. 866 updatedServers := [][]network.HostPort{ 867 network.NewHostPorts(1234, "localhost"), 868 } 869 err := s.BackingState.SetAPIHostPorts(updatedServers) 870 c.Assert(err, jc.ErrorIsNil) 871 872 // Wait for config to be updated. 873 for attempt := coretesting.LongAttempt.Start(); attempt.Next(); { 874 s.BackingState.StartSync() 875 if !attempt.HasNext() { 876 break 877 } 878 addrs, err := a.CurrentConfig().APIAddresses() 879 c.Assert(err, jc.ErrorIsNil) 880 if reflect.DeepEqual(addrs, []string{"localhost:1234"}) { 881 return 882 } 883 } 884 c.Fatalf("timeout while waiting for agent config to change") 885 } 886 887 func (s *MachineSuite) TestMachineAgentRunsDiskManagerWorker(c *gc.C) { 888 // Patch out the worker func before starting the agent. 889 started := newSignal() 890 newWorker := func(diskmanager.ListBlockDevicesFunc, diskmanager.BlockDeviceSetter) worker.Worker { 891 started.trigger() 892 return worker.NewNoOpWorker() 893 } 894 s.PatchValue(&diskmanager.NewWorker, newWorker) 895 896 // Start the machine agent. 897 m, _, _ := s.primeAgent(c, state.JobHostUnits) 898 a := s.newAgent(c, m) 899 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 900 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 901 started.assertTriggered(c, "diskmanager worker to start") 902 } 903 904 func (s *MachineSuite) TestMongoUpgradeWorker(c *gc.C) { 905 // Patch out the worker func before starting the agent. 906 started := make(chan struct{}) 907 newWorker := func(*state.State, string, mongoupgrader.StopMongo) (worker.Worker, error) { 908 close(started) 909 return worker.NewNoOpWorker(), nil 910 } 911 s.PatchValue(&newUpgradeMongoWorker, newWorker) 912 913 // Start the machine agent. 914 m, _, _ := s.primeAgent(c, state.JobManageModel) 915 a := s.newAgent(c, m) 916 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 917 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 918 919 // Wait for worker to be started. 920 s.State.StartSync() 921 select { 922 case <-started: 923 case <-time.After(coretesting.LongWait): 924 c.Fatalf("timeout while waiting for mongo upgrader worker to start") 925 } 926 } 927 928 func (s *MachineSuite) TestDiskManagerWorkerUpdatesState(c *gc.C) { 929 expected := []storage.BlockDevice{{DeviceName: "whatever"}} 930 s.PatchValue(&diskmanager.DefaultListBlockDevices, func() ([]storage.BlockDevice, error) { 931 return expected, nil 932 }) 933 934 // Start the machine agent. 935 m, _, _ := s.primeAgent(c, state.JobHostUnits) 936 a := s.newAgent(c, m) 937 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 938 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 939 940 // Wait for state to be updated. 941 s.BackingState.StartSync() 942 for attempt := coretesting.LongAttempt.Start(); attempt.Next(); { 943 devices, err := s.BackingState.BlockDevices(m.MachineTag()) 944 c.Assert(err, jc.ErrorIsNil) 945 if len(devices) > 0 { 946 c.Assert(devices, gc.HasLen, 1) 947 c.Assert(devices[0].DeviceName, gc.Equals, expected[0].DeviceName) 948 return 949 } 950 } 951 c.Fatalf("timeout while waiting for block devices to be recorded") 952 } 953 954 func (s *MachineSuite) TestMachineAgentDoesNotRunMetadataWorkerForHostUnits(c *gc.C) { 955 s.checkMetadataWorkerNotRun(c, state.JobHostUnits, "can host units") 956 } 957 958 func (s *MachineSuite) TestMachineAgentDoesNotRunMetadataWorkerForNonSimpleStreamDependentProviders(c *gc.C) { 959 s.checkMetadataWorkerNotRun(c, state.JobManageModel, "has provider which doesn't depend on simple streams") 960 } 961 962 func (s *MachineSuite) checkMetadataWorkerNotRun(c *gc.C, job state.MachineJob, suffix string) { 963 // Patch out the worker func before starting the agent. 964 started := newSignal() 965 newWorker := func(cl *imagemetadata.Client) worker.Worker { 966 started.trigger() 967 return worker.NewNoOpWorker() 968 } 969 s.PatchValue(&newMetadataUpdater, newWorker) 970 971 // Start the machine agent. 972 m, _, _ := s.primeAgent(c, job) 973 a := s.newAgent(c, m) 974 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 975 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 976 started.assertNotTriggered(c, startWorkerWait, "metadata update worker started") 977 } 978 979 func (s *MachineSuite) TestMachineAgentRunsMachineStorageWorker(c *gc.C) { 980 m, _, _ := s.primeAgent(c, state.JobHostUnits) 981 982 started := newSignal() 983 newWorker := func(config storageprovisioner.Config) (worker.Worker, error) { 984 c.Check(config.Scope, gc.Equals, m.Tag()) 985 c.Check(config.Validate(), jc.ErrorIsNil) 986 started.trigger() 987 return worker.NewNoOpWorker(), nil 988 } 989 s.PatchValue(&storageprovisioner.NewStorageProvisioner, newWorker) 990 991 // Start the machine agent. 992 a := s.newAgent(c, m) 993 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 994 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 995 started.assertTriggered(c, "storage worker to start") 996 } 997 998 func (s *MachineSuite) TestMachineAgentRunsCertificateUpdateWorkerForController(c *gc.C) { 999 started := newSignal() 1000 newUpdater := func(certupdater.AddressWatcher, certupdater.StateServingInfoGetter, certupdater.ControllerConfigGetter, 1001 certupdater.APIHostPortsGetter, certupdater.StateServingInfoSetter, 1002 ) worker.Worker { 1003 started.trigger() 1004 return worker.NewNoOpWorker() 1005 } 1006 s.PatchValue(&newCertificateUpdater, newUpdater) 1007 1008 // Start the machine agent. 1009 m, _, _ := s.primeAgent(c, state.JobManageModel) 1010 a := s.newAgent(c, m) 1011 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1012 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1013 started.assertTriggered(c, "certificate to be updated") 1014 } 1015 1016 func (s *MachineSuite) TestMachineAgentDoesNotRunsCertificateUpdateWorkerForNonController(c *gc.C) { 1017 started := newSignal() 1018 newUpdater := func(certupdater.AddressWatcher, certupdater.StateServingInfoGetter, certupdater.ControllerConfigGetter, 1019 certupdater.APIHostPortsGetter, certupdater.StateServingInfoSetter, 1020 ) worker.Worker { 1021 started.trigger() 1022 return worker.NewNoOpWorker() 1023 } 1024 s.PatchValue(&newCertificateUpdater, newUpdater) 1025 1026 // Start the machine agent. 1027 m, _, _ := s.primeAgent(c, state.JobHostUnits) 1028 a := s.newAgent(c, m) 1029 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1030 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1031 started.assertNotTriggered(c, startWorkerWait, "certificate was updated") 1032 } 1033 1034 func (s *MachineSuite) TestCertificateUpdateWorkerUpdatesCertificate(c *gc.C) { 1035 // Set up the machine agent. 1036 m, _, _ := s.primeAgent(c, state.JobManageModel) 1037 a := s.newAgent(c, m) 1038 a.ReadConfig(names.NewMachineTag(m.Id()).String()) 1039 1040 // Set up check that certificate has been updated. 1041 updated := make(chan struct{}) 1042 go func() { 1043 for { 1044 stateInfo, _ := a.CurrentConfig().StateServingInfo() 1045 srvCert, err := cert.ParseCert(stateInfo.Cert) 1046 if !c.Check(err, jc.ErrorIsNil) { 1047 break 1048 } 1049 sanIPs := make([]string, len(srvCert.IPAddresses)) 1050 for i, ip := range srvCert.IPAddresses { 1051 sanIPs[i] = ip.String() 1052 } 1053 if len(sanIPs) == 1 && sanIPs[0] == "0.1.2.3" { 1054 close(updated) 1055 break 1056 } 1057 time.Sleep(100 * time.Millisecond) 1058 } 1059 }() 1060 1061 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1062 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1063 s.assertChannelActive(c, updated, "certificate to be updated") 1064 } 1065 1066 func (s *MachineSuite) TestCertificateDNSUpdated(c *gc.C) { 1067 // Disable the certificate work so it doesn't update the certificate. 1068 newUpdater := func(certupdater.AddressWatcher, certupdater.StateServingInfoGetter, certupdater.ControllerConfigGetter, 1069 certupdater.APIHostPortsGetter, certupdater.StateServingInfoSetter, 1070 ) worker.Worker { 1071 return worker.NewNoOpWorker() 1072 } 1073 s.PatchValue(&newCertificateUpdater, newUpdater) 1074 1075 // Set up the machine agent. 1076 m, _, _ := s.primeAgent(c, state.JobManageModel) 1077 a := s.newAgent(c, m) 1078 1079 // Set up check that certificate has been updated when the agent starts. 1080 updated := make(chan struct{}) 1081 expectedDnsNames := set.NewStrings("local", "juju-apiserver", "juju-mongodb") 1082 go func() { 1083 for { 1084 stateInfo, _ := a.CurrentConfig().StateServingInfo() 1085 srvCert, err := cert.ParseCert(stateInfo.Cert) 1086 c.Assert(err, jc.ErrorIsNil) 1087 certDnsNames := set.NewStrings(srvCert.DNSNames...) 1088 if !expectedDnsNames.Difference(certDnsNames).IsEmpty() { 1089 continue 1090 } 1091 pemContent, err := ioutil.ReadFile(filepath.Join(s.DataDir(), "server.pem")) 1092 c.Assert(err, jc.ErrorIsNil) 1093 if string(pemContent) == stateInfo.Cert+"\n"+stateInfo.PrivateKey { 1094 close(updated) 1095 break 1096 } 1097 time.Sleep(10 * time.Millisecond) 1098 } 1099 }() 1100 1101 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1102 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1103 s.assertChannelActive(c, updated, "certificate to be updated") 1104 } 1105 1106 func (s *MachineSuite) setupIgnoreAddresses(c *gc.C, expectedIgnoreValue bool) chan bool { 1107 ignoreAddressCh := make(chan bool, 1) 1108 s.AgentSuite.PatchValue(&machiner.NewMachiner, func(cfg machiner.Config) (worker.Worker, error) { 1109 select { 1110 case ignoreAddressCh <- cfg.ClearMachineAddressesOnStart: 1111 default: 1112 } 1113 1114 // The test just cares that NewMachiner is called with the correct 1115 // value, nothing else is done with the worker. 1116 return newDummyWorker(), nil 1117 }) 1118 1119 attrs := coretesting.Attrs{"ignore-machine-addresses": expectedIgnoreValue} 1120 err := s.BackingState.UpdateModelConfig(attrs, nil, nil) 1121 c.Assert(err, jc.ErrorIsNil) 1122 return ignoreAddressCh 1123 } 1124 1125 func (s *MachineSuite) TestMachineAgentIgnoreAddresses(c *gc.C) { 1126 for _, expectedIgnoreValue := range []bool{true, false} { 1127 ignoreAddressCh := s.setupIgnoreAddresses(c, expectedIgnoreValue) 1128 1129 m, _, _ := s.primeAgent(c, state.JobHostUnits) 1130 a := s.newAgent(c, m) 1131 defer a.Stop() 1132 doneCh := make(chan error) 1133 go func() { 1134 doneCh <- a.Run(nil) 1135 }() 1136 1137 select { 1138 case ignoreMachineAddresses := <-ignoreAddressCh: 1139 if ignoreMachineAddresses != expectedIgnoreValue { 1140 c.Fatalf("expected ignore-machine-addresses = %v, got = %v", expectedIgnoreValue, ignoreMachineAddresses) 1141 } 1142 case <-time.After(coretesting.LongWait): 1143 c.Fatalf("timed out waiting for the machiner to start") 1144 } 1145 s.waitStopped(c, state.JobHostUnits, a, doneCh) 1146 } 1147 } 1148 1149 func (s *MachineSuite) TestMachineAgentIgnoreAddressesContainer(c *gc.C) { 1150 ignoreAddressCh := s.setupIgnoreAddresses(c, true) 1151 1152 parent, err := s.State.AddMachine("quantal", state.JobHostUnits) 1153 c.Assert(err, jc.ErrorIsNil) 1154 m, err := s.State.AddMachineInsideMachine( 1155 state.MachineTemplate{ 1156 Series: "trusty", 1157 Jobs: []state.MachineJob{state.JobHostUnits}, 1158 }, 1159 parent.Id(), 1160 instance.LXD, 1161 ) 1162 c.Assert(err, jc.ErrorIsNil) 1163 1164 vers := version.Binary{ 1165 Number: jujuversion.Current, 1166 Arch: arch.HostArch(), 1167 Series: series.HostSeries(), 1168 } 1169 s.primeAgentWithMachine(c, m, vers) 1170 a := s.newAgent(c, m) 1171 defer a.Stop() 1172 doneCh := make(chan error) 1173 go func() { 1174 doneCh <- a.Run(nil) 1175 }() 1176 1177 select { 1178 case ignoreMachineAddresses := <-ignoreAddressCh: 1179 if ignoreMachineAddresses { 1180 c.Fatalf("expected ignore-machine-addresses = false, got = true") 1181 } 1182 case <-time.After(coretesting.LongWait): 1183 c.Fatalf("timed out waiting for the machiner to start") 1184 } 1185 s.waitStopped(c, state.JobHostUnits, a, doneCh) 1186 } 1187 1188 func (s *MachineSuite) TestMachineAgentSetsPrepareRestore(c *gc.C) { 1189 // Start the machine agent. 1190 m, _, _ := s.primeAgent(c, state.JobHostUnits) 1191 a := s.newAgent(c, m) 1192 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1193 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1194 c.Check(a.IsRestorePreparing(), jc.IsFalse) 1195 c.Check(a.IsRestoreRunning(), jc.IsFalse) 1196 err := a.PrepareRestore() 1197 c.Assert(err, jc.ErrorIsNil) 1198 c.Assert(a.IsRestorePreparing(), jc.IsTrue) 1199 c.Assert(a.IsRestoreRunning(), jc.IsFalse) 1200 err = a.PrepareRestore() 1201 c.Assert(err, gc.ErrorMatches, "already in restore mode") 1202 } 1203 1204 func (s *MachineSuite) TestMachineAgentSetsRestoreInProgress(c *gc.C) { 1205 // Start the machine agent. 1206 m, _, _ := s.primeAgent(c, state.JobHostUnits) 1207 a := s.newAgent(c, m) 1208 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1209 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1210 c.Check(a.IsRestorePreparing(), jc.IsFalse) 1211 c.Check(a.IsRestoreRunning(), jc.IsFalse) 1212 err := a.PrepareRestore() 1213 c.Assert(err, jc.ErrorIsNil) 1214 c.Assert(a.IsRestorePreparing(), jc.IsTrue) 1215 err = a.BeginRestore() 1216 c.Assert(err, jc.ErrorIsNil) 1217 c.Assert(a.IsRestoreRunning(), jc.IsTrue) 1218 err = a.BeginRestore() 1219 c.Assert(err, gc.ErrorMatches, "already restoring") 1220 } 1221 1222 func (s *MachineSuite) TestMachineAgentRestoreRequiresPrepare(c *gc.C) { 1223 // Start the machine agent. 1224 m, _, _ := s.primeAgent(c, state.JobHostUnits) 1225 a := s.newAgent(c, m) 1226 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1227 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1228 c.Check(a.IsRestorePreparing(), jc.IsFalse) 1229 c.Check(a.IsRestoreRunning(), jc.IsFalse) 1230 err := a.BeginRestore() 1231 c.Assert(err, gc.ErrorMatches, "not in restore mode, cannot begin restoration") 1232 c.Assert(a.IsRestoreRunning(), jc.IsFalse) 1233 } 1234 1235 func (s *MachineSuite) TestMachineWorkers(c *gc.C) { 1236 tracker := NewEngineTracker() 1237 instrumented := TrackMachines(c, tracker, machineManifolds) 1238 s.PatchValue(&machineManifolds, instrumented) 1239 1240 m, _, _ := s.primeAgent(c, state.JobHostUnits) 1241 a := s.newAgent(c, m) 1242 go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }() 1243 defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }() 1244 1245 // Wait for it to stabilise, running as normal. 1246 matcher := NewWorkerMatcher(c, tracker, a.Tag().String(), 1247 append(alwaysMachineWorkers, notMigratingMachineWorkers...)) 1248 WaitMatch(c, matcher.Check, coretesting.LongWait, s.BackingState.StartSync) 1249 } 1250 1251 func (s *MachineSuite) TestControllerModelWorkers(c *gc.C) { 1252 uuid := s.BackingState.ModelUUID() 1253 1254 tracker := NewEngineTracker() 1255 instrumented := TrackModels(c, tracker, modelManifolds) 1256 s.PatchValue(&modelManifolds, instrumented) 1257 1258 matcher := NewWorkerMatcher(c, tracker, uuid, 1259 append(alwaysModelWorkers, aliveModelWorkers...)) 1260 s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) { 1261 WaitMatch(c, matcher.Check, coretesting.LongWait, s.BackingState.StartSync) 1262 }) 1263 } 1264 1265 func (s *MachineSuite) TestHostedModelWorkers(c *gc.C) { 1266 // The dummy provider blows up in the face of multi-model 1267 // scenarios so patch in a minimal environs.Environ that's good 1268 // enough to allow the model workers to run. 1269 s.PatchValue(&newEnvirons, func(environs.OpenParams) (environs.Environ, error) { 1270 return &minModelWorkersEnviron{}, nil 1271 }) 1272 1273 st, closer := s.setUpNewModel(c) 1274 defer closer() 1275 uuid := st.ModelUUID() 1276 1277 tracker := NewEngineTracker() 1278 instrumented := TrackModels(c, tracker, modelManifolds) 1279 s.PatchValue(&modelManifolds, instrumented) 1280 1281 matcher := NewWorkerMatcher(c, tracker, uuid, 1282 append(alwaysModelWorkers, aliveModelWorkers...)) 1283 s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) { 1284 WaitMatch(c, matcher.Check, ReallyLongWait, st.StartSync) 1285 }) 1286 } 1287 1288 func (s *MachineSuite) TestMigratingModelWorkers(c *gc.C) { 1289 st, closer := s.setUpNewModel(c) 1290 defer closer() 1291 uuid := st.ModelUUID() 1292 1293 tracker := NewEngineTracker() 1294 1295 // Replace the real migrationmaster worker with a fake one which 1296 // does nothing. This is required to make this test be reliable as 1297 // the environment required for the migrationmaster to operate 1298 // correctly is too involved to set up from here. 1299 // 1300 // TODO(mjs) - an alternative might be to provide a fake Facade 1301 // and api.Open to the real migrationmaster but this test is 1302 // awfully far away from the low level details of the worker. 1303 origModelManifolds := modelManifolds 1304 modelManifoldsDisablingMigrationMaster := func(config model.ManifoldsConfig) dependency.Manifolds { 1305 config.NewMigrationMaster = func(config migrationmaster.Config) (worker.Worker, error) { 1306 return &nullWorker{}, nil 1307 } 1308 return origModelManifolds(config) 1309 } 1310 instrumented := TrackModels(c, tracker, modelManifoldsDisablingMigrationMaster) 1311 s.PatchValue(&modelManifolds, instrumented) 1312 1313 targetControllerTag := names.NewControllerTag(utils.MustNewUUID().String()) 1314 _, err := st.CreateMigration(state.MigrationSpec{ 1315 InitiatedBy: names.NewUserTag("admin"), 1316 TargetInfo: migration.TargetInfo{ 1317 ControllerTag: targetControllerTag, 1318 Addrs: []string{"1.2.3.4:5555"}, 1319 CACert: "cert", 1320 AuthTag: names.NewUserTag("user"), 1321 Password: "password", 1322 }, 1323 }) 1324 c.Assert(err, jc.ErrorIsNil) 1325 1326 matcher := NewWorkerMatcher(c, tracker, uuid, 1327 append(alwaysModelWorkers, migratingModelWorkers...)) 1328 s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) { 1329 WaitMatch(c, matcher.Check, ReallyLongWait, st.StartSync) 1330 }) 1331 } 1332 1333 func (s *MachineSuite) TestDyingModelCleanedUp(c *gc.C) { 1334 st, closer := s.setUpNewModel(c) 1335 defer closer() 1336 1337 timeout := time.After(ReallyLongWait) 1338 s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) { 1339 model, err := st.Model() 1340 c.Assert(err, jc.ErrorIsNil) 1341 watch := model.Watch() 1342 defer workertest.CleanKill(c, watch) 1343 1344 err = model.Destroy() 1345 c.Assert(err, jc.ErrorIsNil) 1346 for { 1347 select { 1348 case <-watch.Changes(): 1349 err := model.Refresh() 1350 cause := errors.Cause(err) 1351 if err == nil { 1352 continue // still there 1353 } else if errors.IsNotFound(cause) { 1354 return // successfully removed 1355 } 1356 c.Assert(err, jc.ErrorIsNil) // guaranteed fail 1357 case <-time.After(coretesting.ShortWait): 1358 st.StartSync() 1359 case <-timeout: 1360 c.Fatalf("timed out waiting for workers") 1361 } 1362 } 1363 }) 1364 } 1365 1366 func (s *MachineSuite) TestModelWorkersRespectSingularResponsibilityFlag(c *gc.C) { 1367 1368 // Grab responsibility for the model on behalf of another machine. 1369 claimer := s.BackingState.SingularClaimer() 1370 uuid := s.BackingState.ModelUUID() 1371 err := claimer.Claim(uuid, "machine-999-lxd-99", time.Hour) 1372 c.Assert(err, jc.ErrorIsNil) 1373 1374 // Then run a normal model-tracking test, just checking for 1375 // a different set of workers. 1376 tracker := NewEngineTracker() 1377 instrumented := TrackModels(c, tracker, modelManifolds) 1378 s.PatchValue(&modelManifolds, instrumented) 1379 1380 matcher := NewWorkerMatcher(c, tracker, uuid, alwaysModelWorkers) 1381 s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) { 1382 WaitMatch(c, matcher.Check, coretesting.LongWait, s.BackingState.StartSync) 1383 }) 1384 } 1385 1386 func (s *MachineSuite) setUpNewModel(c *gc.C) (newSt *state.State, closer func()) { 1387 // Create a new environment, tests can now watch if workers start for it. 1388 newSt = s.Factory.MakeModel(c, nil) 1389 return newSt, func() { 1390 err := newSt.Close() 1391 c.Check(err, jc.ErrorIsNil) 1392 } 1393 } 1394 1395 func (s *MachineSuite) TestReplicasetInitForNewController(c *gc.C) { 1396 if runtime.GOOS == "windows" { 1397 c.Skip("controllers on windows aren't supported") 1398 } 1399 1400 s.fakeEnsureMongo.ServiceInstalled = false 1401 1402 m, _, _ := s.primeAgent(c, state.JobManageModel) 1403 a := s.newAgent(c, m) 1404 agentConfig := a.CurrentConfig() 1405 1406 err := a.ensureMongoServer(agentConfig) 1407 c.Assert(err, jc.ErrorIsNil) 1408 1409 c.Assert(s.fakeEnsureMongo.EnsureCount, gc.Equals, 1) 1410 c.Assert(s.fakeEnsureMongo.InitiateCount, gc.Equals, 0) 1411 } 1412 1413 type nullWorker struct { 1414 tomb tomb.Tomb 1415 } 1416 1417 func (w *nullWorker) Kill() { 1418 w.tomb.Kill(nil) 1419 w.tomb.Done() 1420 } 1421 1422 func (w *nullWorker) Wait() error { 1423 return w.tomb.Wait() 1424 }