github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/cmd/jujud/reboot/reboot.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Copyright 2014 Cloudbase Solutions SRL 3 // Licensed under the AGPLv3, see LICENCE file for details. 4 5 package reboot 6 7 import ( 8 "os" 9 "os/exec" 10 "strings" 11 "time" 12 13 "github.com/juju/errors" 14 "github.com/juju/loggo" 15 "github.com/juju/names/v5" 16 17 "github.com/juju/juju/agent" 18 "github.com/juju/juju/container" 19 "github.com/juju/juju/core/instance" 20 "github.com/juju/juju/environs/instances" 21 "github.com/juju/juju/rpc/params" 22 ) 23 24 var logger = loggo.GetLogger("juju.cmd.jujud.reboot") 25 var timeout = 10 * time.Minute 26 var rebootAfter = 15 27 28 func runCommand(args []string) error { 29 err := exec.Command(args[0], args[1:]...).Run() 30 return errors.Trace(err) 31 } 32 33 var tmpFile = func() (*os.File, error) { 34 f, err := os.CreateTemp(os.TempDir(), "juju-reboot") 35 return f, errors.Trace(err) 36 } 37 38 // Reboot implements the ExecuteReboot command which will reboot a machine 39 // once all containers have shut down, or a timeout is reached 40 type Reboot struct { 41 acfg AgentConfig 42 reboot RebootWaiter 43 } 44 45 func NewRebootWaiter(acfg agent.Config) (*Reboot, error) { 46 // ensure we're only running on a machine agent. 47 if _, ok := acfg.Tag().(names.MachineTag); !ok { 48 return nil, errors.Errorf("Expected names.MachineTag, got: %T --> %v", acfg.Tag(), acfg.Tag()) 49 } 50 return &Reboot{ 51 acfg: &agentConfigShim{aCfg: acfg}, 52 reboot: rebootWaiterShim{}, 53 }, nil 54 } 55 56 // ExecuteReboot will wait for all running containers to stop, and then execute 57 // a shutdown or a reboot (based on the action param) 58 func (r *Reboot) ExecuteReboot(action params.RebootAction) error { 59 if err := r.waitForContainersOrTimeout(); err != nil { 60 return errors.Trace(err) 61 } 62 63 // Stop all units before issuing a reboot. During a reboot, the machine agent 64 // will attempt to hold the execution lock until the reboot happens. However, 65 // since the old file based locking method has been replaced with sockets, if 66 // the machine agent is killed by the init system during shutdown, before the 67 // unit agents, the lock is released and unit agents start running hooks. 68 // When they in turn are killed, the hook is thrown into error state. If 69 // automatic retries are disabled, the hook remains in error state. 70 if err := r.stopDeployedUnits(); err != nil { 71 return errors.Trace(err) 72 } 73 74 if err := r.reboot.ScheduleAction(action, rebootAfter); err != nil { 75 return errors.Trace(err) 76 } 77 78 return nil 79 } 80 81 func (r *Reboot) stopDeployedUnits() error { 82 services, err := r.reboot.ListServices() 83 if err != nil { 84 return err 85 } 86 for _, svcName := range services { 87 if strings.HasPrefix(svcName, `jujud-unit-`) { 88 svc, err := r.reboot.NewServiceReference(svcName) 89 if err != nil { 90 return err 91 } 92 logger.Debugf("Stopping unit agent: %q", svcName) 93 if err = svc.Stop(); err != nil { 94 return err 95 } 96 } 97 } 98 return nil 99 } 100 101 func (r *Reboot) runningContainers() ([]instances.Instance, error) { 102 var runningInstances []instances.Instance 103 modelUUID := r.acfg.Model().Id() 104 for _, val := range instance.ContainerTypes { 105 managerConfig := container.ManagerConfig{ 106 container.ConfigModelUUID: modelUUID, 107 } 108 cfg := managerConfig 109 manager, err := r.reboot.NewContainerManager(val, cfg) 110 if err != nil { 111 return nil, errors.Annotatef(err, "failed to get manager for container type %v", val) 112 } 113 if !manager.IsInitialized() { 114 logger.Infof("container type %q not supported", val) 115 continue 116 } 117 containers, err := manager.ListContainers() 118 if err != nil { 119 return nil, errors.Annotate(err, "failed to list containers") 120 } 121 runningInstances = append(runningInstances, containers...) 122 } 123 return runningInstances, nil 124 } 125 126 func (r *Reboot) waitForContainersOrTimeout() error { 127 c := make(chan error, 1) 128 quit := make(chan bool, 1) 129 go func() { 130 for { 131 select { 132 case <-quit: 133 c <- nil 134 return 135 default: 136 containers, err := r.runningContainers() 137 if err != nil { 138 c <- err 139 return 140 } 141 if len(containers) == 0 { 142 c <- nil 143 return 144 } 145 logger.Warningf("Waiting for containers to shutdown: %v", containers) 146 time.Sleep(1 * time.Second) 147 } 148 } 149 }() 150 151 select { 152 case <-time.After(timeout): 153 // TODO(fwereade): 2016-03-17 lp:1558657 154 // Containers are still up after timeout. C'est la vie 155 quit <- true 156 return errors.New("Timeout reached waiting for containers to shutdown") 157 case err := <-c: 158 return errors.Trace(err) 159 } 160 }