github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/taskrunner/plugin_supervisor_hook.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  	"time"
    10  
    11  	hclog "github.com/hashicorp/go-hclog"
    12  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    13  	ti "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
    14  	"github.com/hashicorp/nomad/client/dynamicplugins"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/hashicorp/nomad/plugins/csi"
    17  	"github.com/hashicorp/nomad/plugins/drivers"
    18  )
    19  
    20  // csiPluginSupervisorHook manages supervising plugins that are running as Nomad
    21  // tasks. These plugins will be fingerprinted and it will manage connecting them
    22  // to their requisite plugin manager.
    23  //
    24  // It provides a couple of things to a task running inside Nomad. These are:
    25  // * A mount to the `plugin_mount_dir`, that will then be used by Nomad
    26  //   to connect to the nested plugin and handle volume mounts.
    27  // * When the task has started, it starts a loop of attempting to connect to the
    28  //   plugin, to perform initial fingerprinting of the plugins capabilities before
    29  //   notifying the plugin manager of the plugin.
    30  type csiPluginSupervisorHook struct {
    31  	logger     hclog.Logger
    32  	alloc      *structs.Allocation
    33  	task       *structs.Task
    34  	runner     *TaskRunner
    35  	mountPoint string
    36  
    37  	// eventEmitter is used to emit events to the task
    38  	eventEmitter ti.EventEmitter
    39  
    40  	shutdownCtx      context.Context
    41  	shutdownCancelFn context.CancelFunc
    42  
    43  	running     bool
    44  	runningLock sync.Mutex
    45  
    46  	// previousHealthstate is used by the supervisor goroutine to track historic
    47  	// health states for gating task events.
    48  	previousHealthState bool
    49  }
    50  
    51  // The plugin supervisor uses the PrestartHook mechanism to setup the requisite
    52  // mount points and configuration for the task that exposes a CSI plugin.
    53  var _ interfaces.TaskPrestartHook = &csiPluginSupervisorHook{}
    54  
    55  // The plugin supervisor uses the PoststartHook mechanism to start polling the
    56  // plugin for readiness and supported functionality before registering the
    57  // plugin with the catalog.
    58  var _ interfaces.TaskPoststartHook = &csiPluginSupervisorHook{}
    59  
    60  // The plugin supervisor uses the StopHook mechanism to deregister the plugin
    61  // with the catalog and to ensure any mounts are cleaned up.
    62  var _ interfaces.TaskStopHook = &csiPluginSupervisorHook{}
    63  
    64  func newCSIPluginSupervisorHook(csiRootDir string, eventEmitter ti.EventEmitter, runner *TaskRunner, logger hclog.Logger) *csiPluginSupervisorHook {
    65  	task := runner.Task()
    66  
    67  	// The Plugin directory will look something like this:
    68  	// .
    69  	// ..
    70  	// csi.sock - A unix domain socket used to communicate with the CSI Plugin
    71  	// staging/
    72  	//  {volume-id}/{usage-mode-hash}/ - Intermediary mount point that will be used by plugins that support NODE_STAGE_UNSTAGE capabilities.
    73  	// per-alloc/
    74  	//  {alloc-id}/{volume-id}/{usage-mode-hash}/ - Mount Point that will be bind-mounted into tasks that utilise the volume
    75  	pluginRoot := filepath.Join(csiRootDir, string(task.CSIPluginConfig.Type), task.CSIPluginConfig.ID)
    76  
    77  	shutdownCtx, cancelFn := context.WithCancel(context.Background())
    78  
    79  	hook := &csiPluginSupervisorHook{
    80  		alloc:            runner.Alloc(),
    81  		runner:           runner,
    82  		logger:           logger,
    83  		task:             task,
    84  		mountPoint:       pluginRoot,
    85  		shutdownCtx:      shutdownCtx,
    86  		shutdownCancelFn: cancelFn,
    87  		eventEmitter:     eventEmitter,
    88  	}
    89  
    90  	return hook
    91  }
    92  
    93  func (*csiPluginSupervisorHook) Name() string {
    94  	return "csi_plugin_supervisor"
    95  }
    96  
    97  // Prestart is called before the task is started including after every
    98  // restart. This requires that the mount paths for a plugin be idempotent,
    99  // despite us not knowing the name of the plugin ahead of time.
   100  // Because of this, we use the allocid_taskname as the unique identifier for a
   101  // plugin on the filesystem.
   102  func (h *csiPluginSupervisorHook) Prestart(ctx context.Context,
   103  	req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
   104  	// Create the mount directory that the container will access if it doesn't
   105  	// already exist. Default to only nomad user access.
   106  	if err := os.MkdirAll(h.mountPoint, 0700); err != nil && !os.IsExist(err) {
   107  		return fmt.Errorf("failed to create mount point: %v", err)
   108  	}
   109  
   110  	configMount := &drivers.MountConfig{
   111  		TaskPath:        h.task.CSIPluginConfig.MountDir,
   112  		HostPath:        h.mountPoint,
   113  		Readonly:        false,
   114  		PropagationMode: "bidirectional",
   115  	}
   116  	devMount := &drivers.MountConfig{
   117  		TaskPath: "/dev",
   118  		HostPath: "/dev",
   119  		Readonly: false,
   120  	}
   121  
   122  	mounts := ensureMountpointInserted(h.runner.hookResources.getMounts(), configMount)
   123  	mounts = ensureMountpointInserted(mounts, devMount)
   124  
   125  	h.runner.hookResources.setMounts(mounts)
   126  
   127  	resp.Done = true
   128  	return nil
   129  }
   130  
   131  // Poststart is called after the task has started. Poststart is not
   132  // called if the allocation is terminal.
   133  //
   134  // The context is cancelled if the task is killed.
   135  func (h *csiPluginSupervisorHook) Poststart(_ context.Context, _ *interfaces.TaskPoststartRequest, _ *interfaces.TaskPoststartResponse) error {
   136  	// If we're already running the supervisor routine, then we don't need to try
   137  	// and restart it here as it only terminates on `Stop` hooks.
   138  	h.runningLock.Lock()
   139  	if h.running {
   140  		h.runningLock.Unlock()
   141  		return nil
   142  	}
   143  	h.runningLock.Unlock()
   144  
   145  	go h.ensureSupervisorLoop(h.shutdownCtx)
   146  	return nil
   147  }
   148  
   149  // ensureSupervisorLoop should be called in a goroutine. It will terminate when
   150  // the passed in context is terminated.
   151  //
   152  // The supervisor works by:
   153  // - Initially waiting for the plugin to become available. This loop is expensive
   154  //   and may do things like create new gRPC Clients on every iteration.
   155  // - After receiving an initial healthy status, it will inform the plugin catalog
   156  //   of the plugin, registering it with the plugins fingerprinted capabilities.
   157  // - We then perform a more lightweight check, simply probing the plugin on a less
   158  //   frequent interval to ensure it is still alive, emitting task events when this
   159  //   status changes.
   160  //
   161  // Deeper fingerprinting of the plugin is implemented by the csimanager.
   162  func (h *csiPluginSupervisorHook) ensureSupervisorLoop(ctx context.Context) {
   163  	h.runningLock.Lock()
   164  	if h.running == true {
   165  		h.runningLock.Unlock()
   166  		return
   167  	}
   168  	h.running = true
   169  	h.runningLock.Unlock()
   170  
   171  	defer func() {
   172  		h.runningLock.Lock()
   173  		h.running = false
   174  		h.runningLock.Unlock()
   175  	}()
   176  
   177  	socketPath := filepath.Join(h.mountPoint, structs.CSISocketName)
   178  	t := time.NewTimer(0)
   179  
   180  	// Step 1: Wait for the plugin to initially become available.
   181  WAITFORREADY:
   182  	for {
   183  		select {
   184  		case <-ctx.Done():
   185  			return
   186  		case <-t.C:
   187  			pluginHealthy, err := h.supervisorLoopOnce(ctx, socketPath)
   188  			if err != nil || !pluginHealthy {
   189  				h.logger.Debug("CSI Plugin not ready", "error", err)
   190  
   191  				// Plugin is not yet returning healthy, because we want to optimise for
   192  				// quickly bringing a plugin online, we use a short timeout here.
   193  				// TODO(dani): Test with more plugins and adjust.
   194  				t.Reset(5 * time.Second)
   195  				continue
   196  			}
   197  
   198  			// Mark the plugin as healthy in a task event
   199  			h.previousHealthState = pluginHealthy
   200  			event := structs.NewTaskEvent(structs.TaskPluginHealthy)
   201  			event.SetMessage(fmt.Sprintf("plugin: %s", h.task.CSIPluginConfig.ID))
   202  			h.eventEmitter.EmitEvent(event)
   203  
   204  			break WAITFORREADY
   205  		}
   206  	}
   207  
   208  	// Step 2: Register the plugin with the catalog.
   209  	deregisterPluginFn, err := h.registerPlugin(socketPath)
   210  	if err != nil {
   211  		h.logger.Error("CSI Plugin registration failed", "error", err)
   212  		event := structs.NewTaskEvent(structs.TaskPluginUnhealthy)
   213  		event.SetMessage(fmt.Sprintf("failed to register plugin: %s, reason: %v", h.task.CSIPluginConfig.ID, err))
   214  		h.eventEmitter.EmitEvent(event)
   215  	}
   216  
   217  	// Step 3: Start the lightweight supervisor loop.
   218  	t.Reset(0)
   219  	for {
   220  		select {
   221  		case <-ctx.Done():
   222  			// De-register plugins on task shutdown
   223  			deregisterPluginFn()
   224  			return
   225  		case <-t.C:
   226  			pluginHealthy, err := h.supervisorLoopOnce(ctx, socketPath)
   227  			if err != nil {
   228  				h.logger.Error("CSI Plugin fingerprinting failed", "error", err)
   229  			}
   230  
   231  			// The plugin has transitioned to a healthy state. Emit an event.
   232  			if !h.previousHealthState && pluginHealthy {
   233  				event := structs.NewTaskEvent(structs.TaskPluginHealthy)
   234  				event.SetMessage(fmt.Sprintf("plugin: %s", h.task.CSIPluginConfig.ID))
   235  				h.eventEmitter.EmitEvent(event)
   236  			}
   237  
   238  			// The plugin has transitioned to an unhealthy state. Emit an event.
   239  			if h.previousHealthState && !pluginHealthy {
   240  				event := structs.NewTaskEvent(structs.TaskPluginUnhealthy)
   241  				if err != nil {
   242  					event.SetMessage(fmt.Sprintf("error: %v", err))
   243  				} else {
   244  					event.SetMessage("Unknown Reason")
   245  				}
   246  				h.eventEmitter.EmitEvent(event)
   247  			}
   248  
   249  			h.previousHealthState = pluginHealthy
   250  
   251  			// This loop is informational and in some plugins this may be expensive to
   252  			// validate. We use a longer timeout (30s) to avoid causing undue work.
   253  			t.Reset(30 * time.Second)
   254  		}
   255  	}
   256  }
   257  
   258  func (h *csiPluginSupervisorHook) registerPlugin(socketPath string) (func(), error) {
   259  
   260  	// At this point we know the plugin is ready and we can fingerprint it
   261  	// to get its vendor name and version
   262  	client, err := csi.NewClient(socketPath, h.logger.Named("csi_client").With("plugin.name", h.task.CSIPluginConfig.ID, "plugin.type", h.task.CSIPluginConfig.Type))
   263  	defer client.Close()
   264  	if err != nil {
   265  		return nil, fmt.Errorf("failed to create csi client: %v", err)
   266  	}
   267  
   268  	info, err := client.PluginInfo()
   269  	if err != nil {
   270  		return nil, fmt.Errorf("failed to probe plugin: %v", err)
   271  	}
   272  
   273  	mkInfoFn := func(pluginType string) *dynamicplugins.PluginInfo {
   274  		return &dynamicplugins.PluginInfo{
   275  			Type:    pluginType,
   276  			Name:    h.task.CSIPluginConfig.ID,
   277  			Version: info.PluginVersion,
   278  			ConnectionInfo: &dynamicplugins.PluginConnectionInfo{
   279  				SocketPath: socketPath,
   280  			},
   281  			AllocID: h.alloc.ID,
   282  			Options: map[string]string{
   283  				"Provider":            info.Name, // vendor name
   284  				"MountPoint":          h.mountPoint,
   285  				"ContainerMountPoint": h.task.CSIPluginConfig.MountDir,
   286  			},
   287  		}
   288  	}
   289  
   290  	registrations := []*dynamicplugins.PluginInfo{}
   291  
   292  	switch h.task.CSIPluginConfig.Type {
   293  	case structs.CSIPluginTypeController:
   294  		registrations = append(registrations, mkInfoFn(dynamicplugins.PluginTypeCSIController))
   295  	case structs.CSIPluginTypeNode:
   296  		registrations = append(registrations, mkInfoFn(dynamicplugins.PluginTypeCSINode))
   297  	case structs.CSIPluginTypeMonolith:
   298  		registrations = append(registrations, mkInfoFn(dynamicplugins.PluginTypeCSIController))
   299  		registrations = append(registrations, mkInfoFn(dynamicplugins.PluginTypeCSINode))
   300  	}
   301  
   302  	deregistrationFns := []func(){}
   303  
   304  	for _, reg := range registrations {
   305  		if err := h.runner.dynamicRegistry.RegisterPlugin(reg); err != nil {
   306  			for _, fn := range deregistrationFns {
   307  				fn()
   308  			}
   309  			return nil, err
   310  		}
   311  
   312  		// need to rebind these so that each deregistration function
   313  		// closes over its own registration
   314  		rname := reg.Name
   315  		rtype := reg.Type
   316  		deregistrationFns = append(deregistrationFns, func() {
   317  			err := h.runner.dynamicRegistry.DeregisterPlugin(rtype, rname)
   318  			if err != nil {
   319  				h.logger.Error("failed to deregister csi plugin", "name", rname, "type", rtype, "error", err)
   320  			}
   321  		})
   322  	}
   323  
   324  	return func() {
   325  		for _, fn := range deregistrationFns {
   326  			fn()
   327  		}
   328  	}, nil
   329  }
   330  
   331  func (h *csiPluginSupervisorHook) supervisorLoopOnce(ctx context.Context, socketPath string) (bool, error) {
   332  	_, err := os.Stat(socketPath)
   333  	if err != nil {
   334  		return false, fmt.Errorf("failed to stat socket: %v", err)
   335  	}
   336  
   337  	client, err := csi.NewClient(socketPath, h.logger.Named("csi_client").With("plugin.name", h.task.CSIPluginConfig.ID, "plugin.type", h.task.CSIPluginConfig.Type))
   338  	defer client.Close()
   339  	if err != nil {
   340  		return false, fmt.Errorf("failed to create csi client: %v", err)
   341  	}
   342  
   343  	healthy, err := client.PluginProbe(ctx)
   344  	if err != nil {
   345  		return false, fmt.Errorf("failed to probe plugin: %v", err)
   346  	}
   347  
   348  	return healthy, nil
   349  }
   350  
   351  // Stop is called after the task has exited and will not be started
   352  // again. It is the only hook guaranteed to be executed whenever
   353  // TaskRunner.Run is called (and not gracefully shutting down).
   354  // Therefore it may be called even when prestart and the other hooks
   355  // have not.
   356  //
   357  // Stop hooks must be idempotent. The context is cancelled prematurely if the
   358  // task is killed.
   359  func (h *csiPluginSupervisorHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error {
   360  	h.shutdownCancelFn()
   361  	return nil
   362  }
   363  
   364  func ensureMountpointInserted(mounts []*drivers.MountConfig, mount *drivers.MountConfig) []*drivers.MountConfig {
   365  	for _, mnt := range mounts {
   366  		if mnt.IsEqual(mount) {
   367  			return mounts
   368  		}
   369  	}
   370  
   371  	mounts = append(mounts, mount)
   372  	return mounts
   373  }