github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/pluginmanager/csimanager/volume.go

github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/pluginmanager/csimanager/volume.go (about)

     1  package csimanager
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  	"time"
    11  
    12  	grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
    13  	"github.com/hashicorp/go-hclog"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/helper/mount"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/plugins/csi"
    18  )
    19  
    20  var _ VolumeMounter = &volumeManager{}
    21  
    22  const (
    23  	DefaultMountActionTimeout = 2 * time.Minute
    24  	StagingDirName            = "staging"
    25  	AllocSpecificDirName      = "per-alloc"
    26  )
    27  
    28  // volumeManager handles the state of attached volumes for a given CSI Plugin.
    29  //
    30  // volumeManagers outlive the lifetime of a given allocation as volumes may be
    31  // shared by multiple allocations on the same node.
    32  //
    33  // volumes are stored by an enriched volume usage struct as the CSI Spec requires
    34  // slightly different usage based on the given usage model.
    35  type volumeManager struct {
    36  	logger  hclog.Logger
    37  	eventer TriggerNodeEvent
    38  	plugin  csi.CSIPlugin
    39  
    40  	usageTracker *volumeUsageTracker
    41  
    42  	// mountRoot is the root of where plugin directories and mounts may be created
    43  	// e.g /opt/nomad.d/statedir/csi/my-csi-plugin/
    44  	mountRoot string
    45  
    46  	// containerMountPoint is the location _inside_ the plugin container that the
    47  	// `mountRoot` is bound in to.
    48  	containerMountPoint string
    49  
    50  	// requiresStaging shows whether the plugin requires that the volume manager
    51  	// calls NodeStageVolume and NodeUnstageVolume RPCs during setup and teardown
    52  	requiresStaging bool
    53  }
    54  
    55  func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.CSIPlugin, rootDir, containerRootDir string, requiresStaging bool) *volumeManager {
    56  	return &volumeManager{
    57  		logger:              logger.Named("volume_manager"),
    58  		eventer:             eventer,
    59  		plugin:              plugin,
    60  		mountRoot:           rootDir,
    61  		containerMountPoint: containerRootDir,
    62  		requiresStaging:     requiresStaging,
    63  		usageTracker:        newVolumeUsageTracker(),
    64  	}
    65  }
    66  
    67  func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string {
    68  	return filepath.Join(root, StagingDirName, volID, usage.ToFS())
    69  }
    70  
    71  func (v *volumeManager) allocDirForVolume(root string, volID, allocID string) string {
    72  	return filepath.Join(root, AllocSpecificDirName, allocID, volID)
    73  }
    74  
    75  func (v *volumeManager) targetForVolume(root string, volID, allocID string, usage *UsageOptions) string {
    76  	return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS())
    77  }
    78  
    79  // ensureStagingDir attempts to create a directory for use when staging a volume
    80  // and then validates that the path is not already a mount point for e.g an
    81  // existing volume stage.
    82  //
    83  // Returns whether the directory is a pre-existing mountpoint, the staging path,
    84  // and any errors that occurred.
    85  func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) {
    86  	stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage)
    87  
    88  	// Make the staging path, owned by the Nomad User
    89  	if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) {
    90  		return "", false, fmt.Errorf("failed to create staging directory for volume (%s): %v", vol.ID, err)
    91  
    92  	}
    93  
    94  	// Validate that it is not already a mount point
    95  	m := mount.New()
    96  	isNotMount, err := m.IsNotAMountPoint(stagingPath)
    97  	if err != nil {
    98  		return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err)
    99  	}
   100  
   101  	return stagingPath, !isNotMount, nil
   102  }
   103  
   104  // ensureAllocDir attempts to create a directory for use when publishing a volume
   105  // and then validates that the path is not already a mount point (e.g when reattaching
   106  // to existing allocs).
   107  //
   108  // Returns whether the directory is a pre-existing mountpoint, the publish path,
   109  // and any errors that occurred.
   110  func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) {
   111  	allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID)
   112  
   113  	// Make the alloc path, owned by the Nomad User
   114  	if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) {
   115  		return "", false, fmt.Errorf("failed to create allocation directory for volume (%s): %v", vol.ID, err)
   116  	}
   117  
   118  	// Validate that the target is not already a mount point
   119  	targetPath := v.targetForVolume(v.mountRoot, vol.ID, alloc.ID, usage)
   120  	m := mount.New()
   121  	isNotMount, err := m.IsNotAMountPoint(targetPath)
   122  	if err != nil {
   123  		return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err)
   124  	}
   125  
   126  	return targetPath, !isNotMount, nil
   127  }
   128  
   129  func volumeCapability(vol *structs.CSIVolume, usage *UsageOptions) (*csi.VolumeCapability, error) {
   130  	capability, err := csi.VolumeCapabilityFromStructs(vol.AttachmentMode, vol.AccessMode)
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  
   135  	var opts *structs.CSIMountOptions
   136  	if vol.MountOptions == nil {
   137  		opts = usage.MountOptions
   138  	} else {
   139  		opts = vol.MountOptions.Copy()
   140  		opts.Merge(usage.MountOptions)
   141  	}
   142  
   143  	capability.MountVolume = opts
   144  
   145  	return capability, nil
   146  }
   147  
   148  // stageVolume prepares a volume for use by allocations. When a plugin exposes
   149  // the STAGE_UNSTAGE_VOLUME capability it MUST be called once-per-volume for a
   150  // given usage mode before the volume can be NodePublish-ed.
   151  func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions, publishContext map[string]string) error {
   152  	logger := hclog.FromContext(ctx)
   153  	logger.Trace("Preparing volume staging environment")
   154  	hostStagingPath, isMount, err := v.ensureStagingDir(vol, usage)
   155  	if err != nil {
   156  		return err
   157  	}
   158  	pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
   159  
   160  	logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath)
   161  
   162  	if isMount {
   163  		logger.Debug("re-using existing staging mount for volume", "staging_path", hostStagingPath)
   164  		return nil
   165  	}
   166  
   167  	capability, err := volumeCapability(vol, usage)
   168  	if err != nil {
   169  		return err
   170  	}
   171  
   172  	req := &csi.NodeStageVolumeRequest{
   173  		ExternalID:        vol.RemoteID(),
   174  		PublishContext:    publishContext,
   175  		StagingTargetPath: pluginStagingPath,
   176  		VolumeCapability:  capability,
   177  		Secrets:           vol.Secrets,
   178  		VolumeContext:     vol.Context,
   179  	}
   180  
   181  	// CSI NodeStageVolume errors for timeout, codes.Unavailable and
   182  	// codes.ResourceExhausted are retried; all other errors are fatal.
   183  	return v.plugin.NodeStageVolume(ctx, req,
   184  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   185  		grpc_retry.WithMax(3),
   186  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   187  	)
   188  }
   189  
   190  func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (*MountInfo, error) {
   191  	logger := hclog.FromContext(ctx)
   192  	var pluginStagingPath string
   193  	if v.requiresStaging {
   194  		pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
   195  	}
   196  
   197  	hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  	pluginTargetPath := v.targetForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage)
   202  
   203  	if isMount {
   204  		logger.Debug("Re-using existing published volume for allocation")
   205  		return &MountInfo{Source: hostTargetPath}, nil
   206  	}
   207  
   208  	capabilities, err := volumeCapability(vol, usage)
   209  	if err != nil {
   210  		return nil, err
   211  	}
   212  
   213  	// CSI NodePublishVolume errors for timeout, codes.Unavailable and
   214  	// codes.ResourceExhausted are retried; all other errors are fatal.
   215  	err = v.plugin.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{
   216  		ExternalID:        vol.RemoteID(),
   217  		PublishContext:    publishContext,
   218  		StagingTargetPath: pluginStagingPath,
   219  		TargetPath:        pluginTargetPath,
   220  		VolumeCapability:  capabilities,
   221  		Readonly:          usage.ReadOnly,
   222  		Secrets:           vol.Secrets,
   223  		VolumeContext:     vol.Context,
   224  	},
   225  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   226  		grpc_retry.WithMax(3),
   227  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   228  	)
   229  
   230  	return &MountInfo{Source: hostTargetPath}, err
   231  }
   232  
   233  // MountVolume performs the steps required for using a given volume
   234  // configuration for the provided allocation.
   235  // It is passed the publishContext from remote attachment, and specific usage
   236  // modes from the CSI Hook.
   237  // It then uses this state to stage and publish the volume as required for use
   238  // by the given allocation.
   239  func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (mountInfo *MountInfo, err error) {
   240  	logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID)
   241  	ctx = hclog.WithContext(ctx, logger)
   242  
   243  	if v.requiresStaging {
   244  		err = v.stageVolume(ctx, vol, usage, publishContext)
   245  	}
   246  
   247  	if err == nil {
   248  		mountInfo, err = v.publishVolume(ctx, vol, alloc, usage, publishContext)
   249  	}
   250  
   251  	if err == nil {
   252  		v.usageTracker.Claim(alloc.ID, vol.ID, usage)
   253  	}
   254  
   255  	event := structs.NewNodeEvent().
   256  		SetSubsystem(structs.NodeEventSubsystemStorage).
   257  		SetMessage("Mount volume").
   258  		AddDetail("volume_id", vol.ID)
   259  	if err == nil {
   260  		event.AddDetail("success", "true")
   261  	} else {
   262  		event.AddDetail("success", "false")
   263  		event.AddDetail("error", err.Error())
   264  	}
   265  
   266  	v.eventer(event)
   267  
   268  	return mountInfo, err
   269  }
   270  
   271  // unstageVolume is the inverse operation of `stageVolume` and must be called
   272  // once for each staging path that a volume has been staged under.
   273  // It is safe to call multiple times and a plugin is required to return OK if
   274  // the volume has been unstaged or was never staged on the node.
   275  func (v *volumeManager) unstageVolume(ctx context.Context, volID, remoteID string, usage *UsageOptions) error {
   276  	logger := hclog.FromContext(ctx)
   277  	logger.Trace("Unstaging volume")
   278  	stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage)
   279  
   280  	// CSI NodeUnstageVolume errors for timeout, codes.Unavailable and
   281  	// codes.ResourceExhausted are retried; all other errors are fatal.
   282  	return v.plugin.NodeUnstageVolume(ctx,
   283  		remoteID,
   284  		stagingPath,
   285  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   286  		grpc_retry.WithMax(3),
   287  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   288  	)
   289  }
   290  
   291  func combineErrors(maybeErrs ...error) error {
   292  	var result *multierror.Error
   293  	for _, err := range maybeErrs {
   294  		if err == nil {
   295  			continue
   296  		}
   297  
   298  		result = multierror.Append(result, err)
   299  	}
   300  
   301  	return result.ErrorOrNil()
   302  }
   303  
   304  func (v *volumeManager) unpublishVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) error {
   305  	pluginTargetPath := v.targetForVolume(v.containerMountPoint, volID, allocID, usage)
   306  
   307  	// CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and
   308  	// codes.ResourceExhausted are retried; all other errors are fatal.
   309  	rpcErr := v.plugin.NodeUnpublishVolume(ctx, remoteID, pluginTargetPath,
   310  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   311  		grpc_retry.WithMax(3),
   312  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   313  	)
   314  
   315  	hostTargetPath := v.targetForVolume(v.mountRoot, volID, allocID, usage)
   316  	if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) {
   317  		if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") {
   318  			// host target path was already destroyed, nothing to do here.
   319  			// this helps us in the case that a previous GC attempt cleaned
   320  			// up the volume on the node but the controller RPCs failed
   321  			rpcErr = fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr)
   322  		}
   323  		return rpcErr
   324  	}
   325  
   326  	// Host Target Path was not cleaned up, attempt to do so here. If it's still
   327  	// a mount then removing the dir will fail and we'll return any rpcErr and the
   328  	// file error.
   329  	rmErr := os.Remove(hostTargetPath)
   330  	if rmErr != nil {
   331  		return combineErrors(rpcErr, rmErr)
   332  	}
   333  
   334  	// We successfully removed the directory, return any rpcErrors that were
   335  	// encountered, but because we got here, they were probably flaky or was
   336  	// cleaned up externally.
   337  	return fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr)
   338  }
   339  
   340  func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) (err error) {
   341  	logger := v.logger.With("volume_id", volID, "alloc_id", allocID)
   342  	ctx = hclog.WithContext(ctx, logger)
   343  
   344  	err = v.unpublishVolume(ctx, volID, remoteID, allocID, usage)
   345  
   346  	if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) {
   347  		canRelease := v.usageTracker.Free(allocID, volID, usage)
   348  		if v.requiresStaging && canRelease {
   349  			err = v.unstageVolume(ctx, volID, remoteID, usage)
   350  		}
   351  	}
   352  
   353  	event := structs.NewNodeEvent().
   354  		SetSubsystem(structs.NodeEventSubsystemStorage).
   355  		SetMessage("Unmount volume").
   356  		AddDetail("volume_id", volID)
   357  	if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) {
   358  		event.AddDetail("success", "true")
   359  	} else {
   360  		event.AddDetail("success", "false")
   361  		event.AddDetail("error", err.Error())
   362  	}
   363  
   364  	v.eventer(event)
   365  
   366  	return err
   367  }