github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/instancemutater/mutater.go (about)

     1  // Copyright 2019 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package instancemutater
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/juju/clock"
    13  	"github.com/juju/collections/set"
    14  	"github.com/juju/errors"
    15  	"github.com/juju/names/v5"
    16  	"github.com/juju/worker/v3"
    17  
    18  	"github.com/juju/juju/api/agent/instancemutater"
    19  	"github.com/juju/juju/core/instance"
    20  	"github.com/juju/juju/core/life"
    21  	"github.com/juju/juju/core/lxdprofile"
    22  	"github.com/juju/juju/core/status"
    23  	"github.com/juju/juju/core/watcher"
    24  	"github.com/juju/juju/environs"
    25  	"github.com/juju/juju/rpc/params"
    26  	"github.com/juju/juju/wrench"
    27  )
    28  
    29  //go:generate go run go.uber.org/mock/mockgen -package mocks -destination mocks/mutatercontext_mock.go github.com/juju/juju/worker/instancemutater MutaterContext
    30  
    31  // lifetimeContext was extracted to allow the various Context clients to get
    32  // the benefits of the catacomb encapsulating everything that should happen
    33  // here. A clean implementation would almost certainly not need this.
    34  type lifetimeContext interface {
    35  	KillWithError(error)
    36  
    37  	add(worker.Worker) error
    38  	dying() <-chan struct{}
    39  	errDying() error
    40  }
    41  
    42  type MachineContext interface {
    43  	lifetimeContext
    44  	getBroker() environs.LXDProfiler
    45  	getRequiredLXDProfiles(string) []string
    46  }
    47  
    48  type MutaterMachine struct {
    49  	context    MachineContext
    50  	logger     Logger
    51  	machineApi instancemutater.MutaterMachine
    52  	id         string
    53  }
    54  
    55  type MutaterContext interface {
    56  	MachineContext
    57  	newMachineContext() MachineContext
    58  	getMachine(tag names.MachineTag) (instancemutater.MutaterMachine, error)
    59  }
    60  
    61  type mutater struct {
    62  	context     MutaterContext
    63  	logger      Logger
    64  	wg          *sync.WaitGroup
    65  	machines    map[names.MachineTag]chan struct{}
    66  	machineDead chan instancemutater.MutaterMachine
    67  }
    68  
    69  func (m *mutater) startMachines(tags []names.MachineTag) error {
    70  	for _, tag := range tags {
    71  		select {
    72  		case <-m.context.dying():
    73  			return m.context.errDying()
    74  		default:
    75  		}
    76  		m.logger.Tracef("received tag %q", tag.String())
    77  		if ch := m.machines[tag]; ch == nil {
    78  			// First time we receive the tag, setup watchers.
    79  			api, err := m.context.getMachine(tag)
    80  			if err != nil {
    81  				return errors.Trace(err)
    82  			}
    83  			id := api.Tag().Id()
    84  
    85  			// Ensure we do not watch any KVM containers.
    86  			containerType, err := api.ContainerType()
    87  			if err != nil {
    88  				return errors.Trace(err)
    89  			}
    90  			if containerType == instance.KVM {
    91  				m.logger.Tracef("ignoring KVM container machine-%s", id)
    92  				continue
    93  			}
    94  
    95  			profileChangeWatcher, err := api.WatchLXDProfileVerificationNeeded()
    96  			if err != nil {
    97  				if errors.IsNotSupported(err) {
    98  					m.logger.Tracef("ignoring manual machine-%s", id)
    99  					continue
   100  				}
   101  				return errors.Annotatef(err, "failed to start watching application lxd profiles for machine-%s", id)
   102  			}
   103  
   104  			ch = make(chan struct{})
   105  			m.machines[tag] = ch
   106  
   107  			machine := MutaterMachine{
   108  				context:    m.context.newMachineContext(),
   109  				logger:     m.logger,
   110  				machineApi: api,
   111  				id:         id,
   112  			}
   113  
   114  			m.wg.Add(1)
   115  			go runMachine(machine, profileChangeWatcher, ch, m.machineDead, func() { m.wg.Done() })
   116  		} else {
   117  			// We've received this tag before, therefore
   118  			// the machine has been removed from the model
   119  			// cache and no longer needed
   120  			ch <- struct{}{}
   121  		}
   122  	}
   123  	return nil
   124  }
   125  
   126  func runMachine(
   127  	machine MutaterMachine,
   128  	profileChangeWatcher watcher.NotifyWatcher,
   129  	removed <-chan struct{}, died chan<- instancemutater.MutaterMachine, cleanup func(),
   130  ) {
   131  	defer cleanup()
   132  	defer func() {
   133  		// We can't just send on the dead channel because the
   134  		// central loop might be trying to write to us on the
   135  		// removed channel.
   136  		for {
   137  			select {
   138  			case <-machine.context.dying():
   139  				return
   140  			case died <- machine.machineApi:
   141  				return
   142  			case <-removed:
   143  			}
   144  		}
   145  	}()
   146  
   147  	if err := machine.context.add(profileChangeWatcher); err != nil {
   148  		machine.context.KillWithError(err)
   149  		return
   150  	}
   151  	if err := machine.watchProfileChangesLoop(removed, profileChangeWatcher); err != nil {
   152  		machine.context.KillWithError(err)
   153  	}
   154  }
   155  
   156  // watchProfileChanges, any error returned will cause the worker to restart.
   157  func (m MutaterMachine) watchProfileChangesLoop(removed <-chan struct{}, profileChangeWatcher watcher.NotifyWatcher) error {
   158  	m.logger.Tracef("watching change on MutaterMachine %s", m.id)
   159  	for {
   160  		select {
   161  		case <-m.context.dying():
   162  			return m.context.errDying()
   163  		case <-profileChangeWatcher.Changes():
   164  			info, err := m.machineApi.CharmProfilingInfo()
   165  			if err != nil {
   166  				// If the machine is not provisioned then we need to wait for
   167  				// new changes from the watcher.
   168  				if params.IsCodeNotProvisioned(errors.Cause(err)) {
   169  					m.logger.Tracef("got not provisioned machine-%s on charm profiling info, wait for another change", m.id)
   170  					continue
   171  				}
   172  				return errors.Trace(err)
   173  			}
   174  			if err = m.processMachineProfileChanges(info); err != nil && errors.IsNotValid(err) {
   175  				// Return to stop mutating the machine, but no need to restart
   176  				// the worker.
   177  				return nil
   178  			} else if err != nil {
   179  				return errors.Trace(err)
   180  			}
   181  		case <-removed:
   182  			if err := m.machineApi.Refresh(); err != nil {
   183  				return errors.Trace(err)
   184  			}
   185  			if m.machineApi.Life() == life.Dead {
   186  				return nil
   187  			}
   188  		}
   189  	}
   190  }
   191  
   192  func (m MutaterMachine) processMachineProfileChanges(info *instancemutater.UnitProfileInfo) error {
   193  	if info == nil || (len(info.CurrentProfiles) == 0 && len(info.ProfileChanges) == 0) {
   194  		// no changes to be made, return now.
   195  		return nil
   196  	}
   197  
   198  	if err := m.machineApi.Refresh(); err != nil {
   199  		return err
   200  	}
   201  	if m.machineApi.Life() == life.Dead {
   202  		return errors.NotValidf("machine %q", m.id)
   203  	}
   204  
   205  	// Set the modification status to idle, that way we have a baseline for
   206  	// future changes.
   207  	if err := m.machineApi.SetModificationStatus(status.Idle, "", nil); err != nil {
   208  		return errors.Annotatef(err, "cannot set status for machine %q modification status", m.id)
   209  	}
   210  
   211  	report := func(retErr error) error {
   212  		if retErr != nil {
   213  			m.logger.Errorf("cannot upgrade machine-%s lxd profiles: %s", m.id, retErr.Error())
   214  			if err := m.machineApi.SetModificationStatus(status.Error, fmt.Sprintf("cannot upgrade machine's lxd profile: %s", retErr.Error()), nil); err != nil {
   215  				m.logger.Errorf("cannot set modification status of machine %q error: %v", m.id, err)
   216  			}
   217  		} else {
   218  			if err := m.machineApi.SetModificationStatus(status.Applied, "", nil); err != nil {
   219  				m.logger.Errorf("cannot reset modification status of machine %q applied: %v", m.id, err)
   220  			}
   221  		}
   222  		return retErr
   223  	}
   224  
   225  	// Convert info.ProfileChanges into a struct which can be used to
   226  	// add or remove profiles from a machine.  Use it to create a list
   227  	// of expected profiles.
   228  	post, err := m.gatherProfileData(info)
   229  	if err != nil {
   230  		return report(errors.Annotatef(err, "%s", m.id))
   231  	}
   232  
   233  	expectedProfiles := m.context.getRequiredLXDProfiles(info.ModelName)
   234  	for _, p := range post {
   235  		if p.Profile != nil {
   236  			expectedProfiles = append(expectedProfiles, p.Name)
   237  		}
   238  	}
   239  
   240  	verified, err := m.verifyCurrentProfiles(string(info.InstanceId), expectedProfiles)
   241  	if err != nil {
   242  		return report(errors.Annotatef(err, "%s", m.id))
   243  	}
   244  	if verified {
   245  		m.logger.Infof("no changes necessary to machine-%s lxd profiles (%v)", m.id, expectedProfiles)
   246  		return report(nil)
   247  	}
   248  
   249  	// Adding a wrench to test charm not running hooks before profile can be applied.
   250  	// Do not bother for the default or model profile.  We're not interested in non
   251  	// charm profiles.
   252  	if wrench.IsActive("instance-mutater", "disable-apply-lxdprofile") && len(expectedProfiles) > 1 {
   253  		m.logger.Warningf("waiting 3 minutes to apply lxd profiles %q due to wrench in the works", strings.Join(expectedProfiles, ", "))
   254  		select {
   255  		case <-clock.WallClock.After(3 * time.Minute):
   256  			m.logger.Warningf("continue with apply lxd profiles")
   257  		}
   258  	}
   259  
   260  	m.logger.Infof("machine-%s (%s) assign lxd profiles %q, %#v", m.id, string(info.InstanceId), expectedProfiles, post)
   261  	broker := m.context.getBroker()
   262  	currentProfiles, err := broker.AssignLXDProfiles(string(info.InstanceId), expectedProfiles, post)
   263  	if err != nil {
   264  		m.logger.Errorf("failure to assign lxd profiles %s to machine-%s: %s", expectedProfiles, m.id, err)
   265  		return report(err)
   266  	}
   267  
   268  	return report(m.machineApi.SetCharmProfiles(currentProfiles))
   269  }
   270  
   271  func (m MutaterMachine) gatherProfileData(info *instancemutater.UnitProfileInfo) ([]lxdprofile.ProfilePost, error) {
   272  	var result []lxdprofile.ProfilePost
   273  	for _, pu := range info.ProfileChanges {
   274  		oldName, err := lxdprofile.MatchProfileNameByAppName(info.CurrentProfiles, pu.ApplicationName)
   275  		if err != nil {
   276  			return nil, err
   277  		}
   278  		if pu.Profile.Empty() && oldName == "" {
   279  			// There is no new Profile and no Profile for this application applied
   280  			// already, move on.  A charm without an lxd profile.
   281  			continue
   282  		}
   283  		name := lxdprofile.Name(info.ModelName, pu.ApplicationName, pu.Revision)
   284  		if oldName != "" && name != oldName {
   285  			// add the old profile name to the result, so the profile can
   286  			// be deleted from the lxd server.
   287  			result = append(result, lxdprofile.ProfilePost{Name: oldName})
   288  		}
   289  		add := lxdprofile.ProfilePost{Name: name}
   290  		// should not happen, but you never know.
   291  		if !pu.Profile.Empty() {
   292  			// We make a copy since the loop var keeps the same pointer.
   293  			p := pu.Profile
   294  			add.Profile = &p
   295  		}
   296  		result = append(result, add)
   297  	}
   298  	return result, nil
   299  }
   300  
   301  func (m MutaterMachine) verifyCurrentProfiles(instID string, expectedProfiles []string) (bool, error) {
   302  	broker := m.context.getBroker()
   303  	obtainedProfiles, err := broker.LXDProfileNames(instID)
   304  	if err != nil {
   305  		return false, err
   306  	}
   307  	obtainedSet := set.NewStrings(obtainedProfiles...)
   308  	expectedSet := set.NewStrings(expectedProfiles...)
   309  
   310  	if obtainedSet.Union(expectedSet).Size() > obtainedSet.Size() {
   311  		return false, nil
   312  	}
   313  
   314  	if expectedSet.Union(obtainedSet).Size() > expectedSet.Size() {
   315  		return false, nil
   316  	}
   317  
   318  	return true, nil
   319  }