github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/cmd/jujud/reboot/reboot.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Copyright 2014 Cloudbase Solutions SRL 3 // Licensed under the AGPLv3, see LICENCE file for details. 4 5 package reboot 6 7 import ( 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "strings" 12 "time" 13 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "github.com/juju/os/series" 17 "gopkg.in/juju/names.v2" 18 19 "github.com/juju/juju/agent" 20 "github.com/juju/juju/apiserver/params" 21 "github.com/juju/juju/container" 22 "github.com/juju/juju/container/factory" 23 "github.com/juju/juju/core/instance" 24 "github.com/juju/juju/environs/instances" 25 "github.com/juju/juju/service" 26 "github.com/juju/juju/service/common" 27 ) 28 29 var logger = loggo.GetLogger("juju.cmd.jujud.reboot") 30 var timeout = 10 * time.Minute 31 var rebootAfter = 15 32 33 func runCommand(args []string) error { 34 err := exec.Command(args[0], args[1:]...).Run() 35 return errors.Trace(err) 36 } 37 38 var tmpFile = func() (*os.File, error) { 39 f, err := ioutil.TempFile(os.TempDir(), "juju-reboot") 40 return f, errors.Trace(err) 41 } 42 43 // Reboot implements the ExecuteReboot command which will reboot a machine 44 // once all containers have shut down, or a timeout is reached 45 type Reboot struct { 46 acfg agent.Config 47 tag names.MachineTag 48 } 49 50 func NewRebootWaiter(acfg agent.Config) (*Reboot, error) { 51 tag, ok := acfg.Tag().(names.MachineTag) 52 if !ok { 53 return nil, errors.Errorf("Expected names.MachineTag, got: %T --> %v", acfg.Tag(), acfg.Tag()) 54 } 55 return &Reboot{ 56 acfg: acfg, 57 tag: tag, 58 }, nil 59 } 60 61 // ExecuteReboot will wait for all running containers to stop, and then execute 62 // a shutdown or a reboot (based on the action param) 63 func (r *Reboot) ExecuteReboot(action params.RebootAction) error { 64 if err := r.waitForContainersOrTimeout(); err != nil { 65 return errors.Trace(err) 66 } 67 68 // Stop all units before issuing a reboot. During a reboot, the machine agent 69 // will attempt to hold the execution lock until the reboot happens. However, since 70 // the old file based locking method has been replaced with semaphores (Windows), and 71 // sockets (Linux), if the machine agent is killed by the init system during shutdown, 72 // before the unit agents, the lock is released and unit agents start running hooks. 73 // When they in turn are killed, the hook is thrown into error state. If automatic retries 74 // are disabled, the hook remains in error state. 75 if err := r.stopDeployedUnits(); err != nil { 76 return errors.Trace(err) 77 } 78 79 if err := scheduleAction(action, rebootAfter); err != nil { 80 return errors.Trace(err) 81 } 82 83 return nil 84 } 85 86 func (r *Reboot) stopDeployedUnits() error { 87 osVersion, err := series.HostSeries() 88 if err != nil { 89 return errors.Trace(err) 90 } 91 services, err := service.ListServices() 92 if err != nil { 93 return err 94 } 95 for _, svcName := range services { 96 if strings.HasPrefix(svcName, `jujud-unit-`) { 97 svc, err := service.NewService(svcName, common.Conf{}, osVersion) 98 if err != nil { 99 return err 100 } 101 logger.Debugf("Stopping unit agent: %q", svcName) 102 if err = svc.Stop(); err != nil { 103 return err 104 } 105 } 106 } 107 return nil 108 } 109 110 func (r *Reboot) runningContainers() ([]instances.Instance, error) { 111 var runningInstances []instances.Instance 112 modelUUID := r.acfg.Model().Id() 113 for _, val := range instance.ContainerTypes { 114 managerConfig := container.ManagerConfig{ 115 container.ConfigModelUUID: modelUUID, 116 } 117 cfg := managerConfig 118 manager, err := factory.NewContainerManager(val, cfg) 119 if err != nil { 120 return nil, errors.Annotatef(err, "failed to get manager for container type %v", val) 121 } 122 if !manager.IsInitialized() { 123 logger.Infof("container type %q not supported", val) 124 continue 125 } 126 instances, err := manager.ListContainers() 127 if err != nil { 128 return nil, errors.Annotate(err, "failed to list containers") 129 } 130 runningInstances = append(runningInstances, instances...) 131 } 132 return runningInstances, nil 133 } 134 135 func (r *Reboot) waitForContainersOrTimeout() error { 136 c := make(chan error, 1) 137 quit := make(chan bool, 1) 138 go func() { 139 for { 140 select { 141 case <-quit: 142 c <- nil 143 return 144 default: 145 containers, err := r.runningContainers() 146 if err != nil { 147 c <- err 148 return 149 } 150 if len(containers) == 0 { 151 c <- nil 152 return 153 } 154 logger.Warningf("Waiting for containers to shutdown: %v", containers) 155 time.Sleep(1 * time.Second) 156 } 157 } 158 }() 159 160 select { 161 case <-time.After(timeout): 162 // TODO(fwereade): 2016-03-17 lp:1558657 163 // Containers are still up after timeout. C'est la vie 164 quit <- true 165 return errors.New("Timeout reached waiting for containers to shutdown") 166 case err := <-c: 167 return errors.Trace(err) 168 } 169 }