github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/pluginmanager/csimanager/volume.go

github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/pluginmanager/csimanager/volume.go (about)

     1  package csimanager
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  	"time"
    11  
    12  	grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
    13  	"github.com/hashicorp/go-hclog"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/helper/mount"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/plugins/csi"
    18  )
    19  
    20  var _ VolumeMounter = &volumeManager{}
    21  
    22  const (
    23  	DefaultMountActionTimeout = 2 * time.Minute
    24  	StagingDirName            = "staging"
    25  	AllocSpecificDirName      = "per-alloc"
    26  )
    27  
    28  // volumeManager handles the state of attached volumes for a given CSI Plugin.
    29  //
    30  // volumeManagers outlive the lifetime of a given allocation as volumes may be
    31  // shared by multiple allocations on the same node.
    32  //
    33  // volumes are stored by an enriched volume usage struct as the CSI Spec requires
    34  // slightly different usage based on the given usage model.
    35  type volumeManager struct {
    36  	logger  hclog.Logger
    37  	eventer TriggerNodeEvent
    38  	plugin  csi.CSIPlugin
    39  
    40  	usageTracker *volumeUsageTracker
    41  
    42  	// mountRoot is the root of where plugin directories and mounts may be created
    43  	// e.g /opt/nomad.d/statedir/csi/my-csi-plugin/
    44  	mountRoot string
    45  
    46  	// containerMountPoint is the location _inside_ the plugin container that the
    47  	// `mountRoot` is bound in to.
    48  	containerMountPoint string
    49  
    50  	// requiresStaging shows whether the plugin requires that the volume manager
    51  	// calls NodeStageVolume and NodeUnstageVolume RPCs during setup and teardown
    52  	requiresStaging bool
    53  }
    54  
    55  func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.CSIPlugin, rootDir, containerRootDir string, requiresStaging bool) *volumeManager {
    56  	return &volumeManager{
    57  		logger:              logger.Named("volume_manager"),
    58  		eventer:             eventer,
    59  		plugin:              plugin,
    60  		mountRoot:           rootDir,
    61  		containerMountPoint: containerRootDir,
    62  		requiresStaging:     requiresStaging,
    63  		usageTracker:        newVolumeUsageTracker(),
    64  	}
    65  }
    66  
    67  func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string {
    68  	return filepath.Join(root, StagingDirName, volID, usage.ToFS())
    69  }
    70  
    71  func (v *volumeManager) allocDirForVolume(root string, volID, allocID string) string {
    72  	return filepath.Join(root, AllocSpecificDirName, allocID, volID)
    73  }
    74  
    75  func (v *volumeManager) targetForVolume(root string, volID, allocID string, usage *UsageOptions) string {
    76  	return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS())
    77  }
    78  
    79  // ensureStagingDir attempts to create a directory for use when staging a volume
    80  // and then validates that the path is not already a mount point for e.g an
    81  // existing volume stage.
    82  //
    83  // Returns whether the directory is a pre-existing mountpoint, the staging path,
    84  // and any errors that occurred.
    85  func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) {
    86  	stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage)
    87  
    88  	// Make the staging path, owned by the Nomad User
    89  	if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) {
    90  		return "", false, fmt.Errorf("failed to create staging directory for volume (%s): %v", vol.ID, err)
    91  
    92  	}
    93  
    94  	// Validate that it is not already a mount point
    95  	m := mount.New()
    96  	isNotMount, err := m.IsNotAMountPoint(stagingPath)
    97  	if err != nil {
    98  		return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err)
    99  	}
   100  
   101  	return stagingPath, !isNotMount, nil
   102  }
   103  
   104  // ensureAllocDir attempts to create a directory for use when publishing a volume
   105  // and then validates that the path is not already a mount point (e.g when reattaching
   106  // to existing allocs).
   107  //
   108  // Returns whether the directory is a pre-existing mountpoint, the publish path,
   109  // and any errors that occurred.
   110  func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) {
   111  	allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID)
   112  
   113  	// Make the alloc path, owned by the Nomad User
   114  	if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) {
   115  		return "", false, fmt.Errorf("failed to create allocation directory for volume (%s): %v", vol.ID, err)
   116  	}
   117  
   118  	// Validate that the target is not already a mount point
   119  	targetPath := v.targetForVolume(v.mountRoot, vol.ID, alloc.ID, usage)
   120  
   121  	m := mount.New()
   122  	isNotMount, err := m.IsNotAMountPoint(targetPath)
   123  
   124  	switch {
   125  	case errors.Is(err, os.ErrNotExist):
   126  		// ignore; path does not exist and as such is not a mount
   127  	case err != nil:
   128  		return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err)
   129  	}
   130  
   131  	return targetPath, !isNotMount, nil
   132  }
   133  
   134  func volumeCapability(vol *structs.CSIVolume, usage *UsageOptions) (*csi.VolumeCapability, error) {
   135  	var opts *structs.CSIMountOptions
   136  	if vol.MountOptions == nil {
   137  		opts = usage.MountOptions
   138  	} else {
   139  		opts = vol.MountOptions.Copy()
   140  		opts.Merge(usage.MountOptions)
   141  	}
   142  
   143  	capability, err := csi.VolumeCapabilityFromStructs(usage.AttachmentMode, usage.AccessMode, opts)
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	return capability, nil
   149  }
   150  
   151  // stageVolume prepares a volume for use by allocations. When a plugin exposes
   152  // the STAGE_UNSTAGE_VOLUME capability it MUST be called once-per-volume for a
   153  // given usage mode before the volume can be NodePublish-ed.
   154  func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions, publishContext map[string]string) error {
   155  	logger := hclog.FromContext(ctx)
   156  	logger.Trace("Preparing volume staging environment")
   157  	hostStagingPath, isMount, err := v.ensureStagingDir(vol, usage)
   158  	if err != nil {
   159  		return err
   160  	}
   161  	pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
   162  
   163  	logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath)
   164  
   165  	if isMount {
   166  		logger.Debug("re-using existing staging mount for volume", "staging_path", hostStagingPath)
   167  		return nil
   168  	}
   169  
   170  	capability, err := volumeCapability(vol, usage)
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	req := &csi.NodeStageVolumeRequest{
   176  		ExternalID:        vol.RemoteID(),
   177  		PublishContext:    publishContext,
   178  		StagingTargetPath: pluginStagingPath,
   179  		VolumeCapability:  capability,
   180  		Secrets:           vol.Secrets,
   181  		VolumeContext:     vol.Context,
   182  	}
   183  
   184  	// CSI NodeStageVolume errors for timeout, codes.Unavailable and
   185  	// codes.ResourceExhausted are retried; all other errors are fatal.
   186  	return v.plugin.NodeStageVolume(ctx, req,
   187  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   188  		grpc_retry.WithMax(3),
   189  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   190  	)
   191  }
   192  
   193  func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (*MountInfo, error) {
   194  	logger := hclog.FromContext(ctx)
   195  	var pluginStagingPath string
   196  	if v.requiresStaging {
   197  		pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
   198  	}
   199  
   200  	hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	pluginTargetPath := v.targetForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage)
   205  
   206  	if isMount {
   207  		logger.Debug("Re-using existing published volume for allocation")
   208  		return &MountInfo{Source: hostTargetPath}, nil
   209  	}
   210  
   211  	capabilities, err := volumeCapability(vol, usage)
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  
   216  	// CSI NodePublishVolume errors for timeout, codes.Unavailable and
   217  	// codes.ResourceExhausted are retried; all other errors are fatal.
   218  	err = v.plugin.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{
   219  		ExternalID:        vol.RemoteID(),
   220  		PublishContext:    publishContext,
   221  		StagingTargetPath: pluginStagingPath,
   222  		TargetPath:        pluginTargetPath,
   223  		VolumeCapability:  capabilities,
   224  		Readonly:          usage.ReadOnly,
   225  		Secrets:           vol.Secrets,
   226  		VolumeContext:     vol.Context,
   227  	},
   228  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   229  		grpc_retry.WithMax(3),
   230  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   231  	)
   232  
   233  	return &MountInfo{Source: hostTargetPath}, err
   234  }
   235  
   236  // MountVolume performs the steps required for using a given volume
   237  // configuration for the provided allocation.
   238  // It is passed the publishContext from remote attachment, and specific usage
   239  // modes from the CSI Hook.
   240  // It then uses this state to stage and publish the volume as required for use
   241  // by the given allocation.
   242  func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (mountInfo *MountInfo, err error) {
   243  	logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID)
   244  	ctx = hclog.WithContext(ctx, logger)
   245  
   246  	if v.requiresStaging {
   247  		err = v.stageVolume(ctx, vol, usage, publishContext)
   248  	}
   249  
   250  	if err == nil {
   251  		mountInfo, err = v.publishVolume(ctx, vol, alloc, usage, publishContext)
   252  	}
   253  
   254  	if err == nil {
   255  		v.usageTracker.Claim(alloc.ID, vol.ID, usage)
   256  	}
   257  
   258  	event := structs.NewNodeEvent().
   259  		SetSubsystem(structs.NodeEventSubsystemStorage).
   260  		SetMessage("Mount volume").
   261  		AddDetail("volume_id", vol.ID)
   262  	if err == nil {
   263  		event.AddDetail("success", "true")
   264  	} else {
   265  		event.AddDetail("success", "false")
   266  		event.AddDetail("error", err.Error())
   267  	}
   268  
   269  	v.eventer(event)
   270  
   271  	return mountInfo, err
   272  }
   273  
   274  // unstageVolume is the inverse operation of `stageVolume` and must be called
   275  // once for each staging path that a volume has been staged under.
   276  // It is safe to call multiple times and a plugin is required to return OK if
   277  // the volume has been unstaged or was never staged on the node.
   278  func (v *volumeManager) unstageVolume(ctx context.Context, volID, remoteID string, usage *UsageOptions) error {
   279  	logger := hclog.FromContext(ctx)
   280  	logger.Trace("Unstaging volume")
   281  	stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage)
   282  
   283  	// CSI NodeUnstageVolume errors for timeout, codes.Unavailable and
   284  	// codes.ResourceExhausted are retried; all other errors are fatal.
   285  	return v.plugin.NodeUnstageVolume(ctx,
   286  		remoteID,
   287  		stagingPath,
   288  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   289  		grpc_retry.WithMax(3),
   290  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   291  	)
   292  }
   293  
   294  func combineErrors(maybeErrs ...error) error {
   295  	var result *multierror.Error
   296  	for _, err := range maybeErrs {
   297  		if err == nil {
   298  			continue
   299  		}
   300  
   301  		result = multierror.Append(result, err)
   302  	}
   303  
   304  	return result.ErrorOrNil()
   305  }
   306  
   307  func (v *volumeManager) unpublishVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) error {
   308  	pluginTargetPath := v.targetForVolume(v.containerMountPoint, volID, allocID, usage)
   309  
   310  	// CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and
   311  	// codes.ResourceExhausted are retried; all other errors are fatal.
   312  	rpcErr := v.plugin.NodeUnpublishVolume(ctx, remoteID, pluginTargetPath,
   313  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   314  		grpc_retry.WithMax(3),
   315  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   316  	)
   317  
   318  	hostTargetPath := v.targetForVolume(v.mountRoot, volID, allocID, usage)
   319  	if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) {
   320  		if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") {
   321  			// host target path was already destroyed, nothing to do here.
   322  			// this helps us in the case that a previous GC attempt cleaned
   323  			// up the volume on the node but the controller RPCs failed
   324  			rpcErr = fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr)
   325  		}
   326  		return rpcErr
   327  	}
   328  
   329  	// Host Target Path was not cleaned up, attempt to do so here. If it's still
   330  	// a mount then removing the dir will fail and we'll return any rpcErr and the
   331  	// file error.
   332  	rmErr := os.Remove(hostTargetPath)
   333  	if rmErr != nil {
   334  		return combineErrors(rpcErr, rmErr)
   335  	}
   336  
   337  	// We successfully removed the directory, return any rpcErrors that were
   338  	// encountered, but because we got here, they were probably flaky or was
   339  	// cleaned up externally.
   340  	return fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr)
   341  }
   342  
   343  func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) (err error) {
   344  	logger := v.logger.With("volume_id", volID, "alloc_id", allocID)
   345  	ctx = hclog.WithContext(ctx, logger)
   346  
   347  	err = v.unpublishVolume(ctx, volID, remoteID, allocID, usage)
   348  
   349  	if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) {
   350  		canRelease := v.usageTracker.Free(allocID, volID, usage)
   351  		if v.requiresStaging && canRelease {
   352  			err = v.unstageVolume(ctx, volID, remoteID, usage)
   353  		}
   354  	}
   355  
   356  	if errors.Is(err, structs.ErrCSIClientRPCIgnorable) {
   357  		logger.Trace("unmounting volume failed with ignorable error", "error", err)
   358  		err = nil
   359  	}
   360  
   361  	event := structs.NewNodeEvent().
   362  		SetSubsystem(structs.NodeEventSubsystemStorage).
   363  		SetMessage("Unmount volume").
   364  		AddDetail("volume_id", volID)
   365  	if err == nil {
   366  		event.AddDetail("success", "true")
   367  	} else {
   368  		event.AddDetail("success", "false")
   369  		event.AddDetail("error", err.Error())
   370  	}
   371  
   372  	v.eventer(event)
   373  
   374  	return err
   375  }