github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/cmd/jujud/reboot/reboot.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Copyright 2014 Cloudbase Solutions SRL
     3  // Licensed under the AGPLv3, see LICENCE file for details.
     4  
     5  package reboot
     6  
     7  import (
     8  	"io/ioutil"
     9  	"os"
    10  	"os/exec"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/juju/errors"
    15  	"github.com/juju/loggo"
    16  	"github.com/juju/os/series"
    17  	"gopkg.in/juju/names.v2"
    18  
    19  	"github.com/juju/juju/agent"
    20  	"github.com/juju/juju/apiserver/params"
    21  	"github.com/juju/juju/container"
    22  	"github.com/juju/juju/container/factory"
    23  	"github.com/juju/juju/core/instance"
    24  	"github.com/juju/juju/environs/instances"
    25  	"github.com/juju/juju/service"
    26  	"github.com/juju/juju/service/common"
    27  )
    28  
    29  var logger = loggo.GetLogger("juju.cmd.jujud.reboot")
    30  var timeout = 10 * time.Minute
    31  var rebootAfter = 15
    32  
    33  func runCommand(args []string) error {
    34  	err := exec.Command(args[0], args[1:]...).Run()
    35  	return errors.Trace(err)
    36  }
    37  
    38  var tmpFile = func() (*os.File, error) {
    39  	f, err := ioutil.TempFile(os.TempDir(), "juju-reboot")
    40  	return f, errors.Trace(err)
    41  }
    42  
    43  // Reboot implements the ExecuteReboot command which will reboot a machine
    44  // once all containers have shut down, or a timeout is reached
    45  type Reboot struct {
    46  	acfg agent.Config
    47  	tag  names.MachineTag
    48  }
    49  
    50  func NewRebootWaiter(acfg agent.Config) (*Reboot, error) {
    51  	tag, ok := acfg.Tag().(names.MachineTag)
    52  	if !ok {
    53  		return nil, errors.Errorf("Expected names.MachineTag, got: %T --> %v", acfg.Tag(), acfg.Tag())
    54  	}
    55  	return &Reboot{
    56  		acfg: acfg,
    57  		tag:  tag,
    58  	}, nil
    59  }
    60  
    61  // ExecuteReboot will wait for all running containers to stop, and then execute
    62  // a shutdown or a reboot (based on the action param)
    63  func (r *Reboot) ExecuteReboot(action params.RebootAction) error {
    64  	if err := r.waitForContainersOrTimeout(); err != nil {
    65  		return errors.Trace(err)
    66  	}
    67  
    68  	// Stop all units before issuing a reboot. During a reboot, the machine agent
    69  	// will attempt to hold the execution lock until the reboot happens. However, since
    70  	// the old file based locking method has been replaced with semaphores (Windows), and
    71  	// sockets (Linux), if the machine agent is killed by the init system during shutdown,
    72  	// before the unit agents, the lock is released and unit agents start running hooks.
    73  	// When they in turn are killed, the hook is thrown into error state. If automatic retries
    74  	// are disabled, the hook remains in error state.
    75  	if err := r.stopDeployedUnits(); err != nil {
    76  		return errors.Trace(err)
    77  	}
    78  
    79  	if err := scheduleAction(action, rebootAfter); err != nil {
    80  		return errors.Trace(err)
    81  	}
    82  
    83  	return nil
    84  }
    85  
    86  func (r *Reboot) stopDeployedUnits() error {
    87  	osVersion, err := series.HostSeries()
    88  	if err != nil {
    89  		return errors.Trace(err)
    90  	}
    91  	services, err := service.ListServices()
    92  	if err != nil {
    93  		return err
    94  	}
    95  	for _, svcName := range services {
    96  		if strings.HasPrefix(svcName, `jujud-unit-`) {
    97  			svc, err := service.NewService(svcName, common.Conf{}, osVersion)
    98  			if err != nil {
    99  				return err
   100  			}
   101  			logger.Debugf("Stopping unit agent: %q", svcName)
   102  			if err = svc.Stop(); err != nil {
   103  				return err
   104  			}
   105  		}
   106  	}
   107  	return nil
   108  }
   109  
   110  func (r *Reboot) runningContainers() ([]instances.Instance, error) {
   111  	var runningInstances []instances.Instance
   112  	modelUUID := r.acfg.Model().Id()
   113  	for _, val := range instance.ContainerTypes {
   114  		managerConfig := container.ManagerConfig{
   115  			container.ConfigModelUUID: modelUUID,
   116  		}
   117  		cfg := managerConfig
   118  		manager, err := factory.NewContainerManager(val, cfg)
   119  		if err != nil {
   120  			return nil, errors.Annotatef(err, "failed to get manager for container type %v", val)
   121  		}
   122  		if !manager.IsInitialized() {
   123  			logger.Infof("container type %q not supported", val)
   124  			continue
   125  		}
   126  		instances, err := manager.ListContainers()
   127  		if err != nil {
   128  			return nil, errors.Annotate(err, "failed to list containers")
   129  		}
   130  		runningInstances = append(runningInstances, instances...)
   131  	}
   132  	return runningInstances, nil
   133  }
   134  
   135  func (r *Reboot) waitForContainersOrTimeout() error {
   136  	c := make(chan error, 1)
   137  	quit := make(chan bool, 1)
   138  	go func() {
   139  		for {
   140  			select {
   141  			case <-quit:
   142  				c <- nil
   143  				return
   144  			default:
   145  				containers, err := r.runningContainers()
   146  				if err != nil {
   147  					c <- err
   148  					return
   149  				}
   150  				if len(containers) == 0 {
   151  					c <- nil
   152  					return
   153  				}
   154  				logger.Warningf("Waiting for containers to shutdown: %v", containers)
   155  				time.Sleep(1 * time.Second)
   156  			}
   157  		}
   158  	}()
   159  
   160  	select {
   161  	case <-time.After(timeout):
   162  		// TODO(fwereade): 2016-03-17 lp:1558657
   163  		// Containers are still up after timeout. C'est la vie
   164  		quit <- true
   165  		return errors.New("Timeout reached waiting for containers to shutdown")
   166  	case err := <-c:
   167  		return errors.Trace(err)
   168  	}
   169  }