github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/pluginmanager/csimanager/volume.go

github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/pluginmanager/csimanager/volume.go (about)

     1  package csimanager
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"strings"
     9  	"time"
    10  
    11  	grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
    12  	"github.com/hashicorp/go-hclog"
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/nomad/helper/mount"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/hashicorp/nomad/plugins/csi"
    17  )
    18  
    19  var _ VolumeMounter = &volumeManager{}
    20  
    21  const (
    22  	DefaultMountActionTimeout = 2 * time.Minute
    23  	StagingDirName            = "staging"
    24  	AllocSpecificDirName      = "per-alloc"
    25  )
    26  
    27  // volumeManager handles the state of attached volumes for a given CSI Plugin.
    28  //
    29  // volumeManagers outlive the lifetime of a given allocation as volumes may be
    30  // shared by multiple allocations on the same node.
    31  //
    32  // volumes are stored by an enriched volume usage struct as the CSI Spec requires
    33  // slightly different usage based on the given usage model.
    34  type volumeManager struct {
    35  	logger  hclog.Logger
    36  	eventer TriggerNodeEvent
    37  	plugin  csi.CSIPlugin
    38  
    39  	usageTracker *volumeUsageTracker
    40  
    41  	// mountRoot is the root of where plugin directories and mounts may be created
    42  	// e.g /opt/nomad.d/statedir/csi/my-csi-plugin/
    43  	mountRoot string
    44  
    45  	// containerMountPoint is the location _inside_ the plugin container that the
    46  	// `mountRoot` is bound in to.
    47  	containerMountPoint string
    48  
    49  	// requiresStaging shows whether the plugin requires that the volume manager
    50  	// calls NodeStageVolume and NodeUnstageVolume RPCs during setup and teardown
    51  	requiresStaging bool
    52  }
    53  
    54  func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.CSIPlugin, rootDir, containerRootDir string, requiresStaging bool) *volumeManager {
    55  	return &volumeManager{
    56  		logger:              logger.Named("volume_manager"),
    57  		eventer:             eventer,
    58  		plugin:              plugin,
    59  		mountRoot:           rootDir,
    60  		containerMountPoint: containerRootDir,
    61  		requiresStaging:     requiresStaging,
    62  		usageTracker:        newVolumeUsageTracker(),
    63  	}
    64  }
    65  
    66  func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string {
    67  	return filepath.Join(root, StagingDirName, volID, usage.ToFS())
    68  }
    69  
    70  func (v *volumeManager) allocDirForVolume(root string, volID, allocID string, usage *UsageOptions) string {
    71  	return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS())
    72  }
    73  
    74  // ensureStagingDir attempts to create a directory for use when staging a volume
    75  // and then validates that the path is not already a mount point for e.g an
    76  // existing volume stage.
    77  //
    78  // Returns whether the directory is a pre-existing mountpoint, the staging path,
    79  // and any errors that occurred.
    80  func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) {
    81  	stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage)
    82  
    83  	// Make the staging path, owned by the Nomad User
    84  	if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) {
    85  		return "", false, fmt.Errorf("failed to create staging directory for volume (%s): %v", vol.ID, err)
    86  
    87  	}
    88  
    89  	// Validate that it is not already a mount point
    90  	m := mount.New()
    91  	isNotMount, err := m.IsNotAMountPoint(stagingPath)
    92  	if err != nil {
    93  		return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err)
    94  	}
    95  
    96  	return stagingPath, !isNotMount, nil
    97  }
    98  
    99  // ensureAllocDir attempts to create a directory for use when publishing a volume
   100  // and then validates that the path is not already a mount point (e.g when reattaching
   101  // to existing allocs).
   102  //
   103  // Returns whether the directory is a pre-existing mountpoint, the publish path,
   104  // and any errors that occurred.
   105  func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) {
   106  	allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID, usage)
   107  
   108  	// Make the alloc path, owned by the Nomad User
   109  	if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) {
   110  		return "", false, fmt.Errorf("failed to create allocation directory for volume (%s): %v", vol.ID, err)
   111  	}
   112  
   113  	// Validate that it is not already a mount point
   114  	m := mount.New()
   115  	isNotMount, err := m.IsNotAMountPoint(allocPath)
   116  	if err != nil {
   117  		return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err)
   118  	}
   119  
   120  	return allocPath, !isNotMount, nil
   121  }
   122  
   123  func volumeCapability(vol *structs.CSIVolume, usage *UsageOptions) (*csi.VolumeCapability, error) {
   124  	capability, err := csi.VolumeCapabilityFromStructs(vol.AttachmentMode, vol.AccessMode)
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  
   129  	var opts *structs.CSIMountOptions
   130  	if vol.MountOptions == nil {
   131  		opts = usage.MountOptions
   132  	} else {
   133  		opts = vol.MountOptions.Copy()
   134  		opts.Merge(usage.MountOptions)
   135  	}
   136  
   137  	capability.MountVolume = opts
   138  
   139  	return capability, nil
   140  }
   141  
   142  // stageVolume prepares a volume for use by allocations. When a plugin exposes
   143  // the STAGE_UNSTAGE_VOLUME capability it MUST be called once-per-volume for a
   144  // given usage mode before the volume can be NodePublish-ed.
   145  func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions, publishContext map[string]string) error {
   146  	logger := hclog.FromContext(ctx)
   147  	logger.Trace("Preparing volume staging environment")
   148  	hostStagingPath, isMount, err := v.ensureStagingDir(vol, usage)
   149  	if err != nil {
   150  		return err
   151  	}
   152  	pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
   153  
   154  	logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath)
   155  
   156  	if isMount {
   157  		logger.Debug("re-using existing staging mount for volume", "staging_path", hostStagingPath)
   158  		return nil
   159  	}
   160  
   161  	capability, err := volumeCapability(vol, usage)
   162  	if err != nil {
   163  		return err
   164  	}
   165  
   166  	// CSI NodeStageVolume errors for timeout, codes.Unavailable and
   167  	// codes.ResourceExhausted are retried; all other errors are fatal.
   168  	return v.plugin.NodeStageVolume(ctx,
   169  		vol.RemoteID(),
   170  		publishContext,
   171  		pluginStagingPath,
   172  		capability,
   173  		vol.Secrets,
   174  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   175  		grpc_retry.WithMax(3),
   176  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   177  	)
   178  }
   179  
   180  func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (*MountInfo, error) {
   181  	logger := hclog.FromContext(ctx)
   182  	var pluginStagingPath string
   183  	if v.requiresStaging {
   184  		pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage)
   185  	}
   186  
   187  	hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage)
   192  
   193  	if isMount {
   194  		logger.Debug("Re-using existing published volume for allocation")
   195  		return &MountInfo{Source: hostTargetPath}, nil
   196  	}
   197  
   198  	capabilities, err := volumeCapability(vol, usage)
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  
   203  	// CSI NodePublishVolume errors for timeout, codes.Unavailable and
   204  	// codes.ResourceExhausted are retried; all other errors are fatal.
   205  	err = v.plugin.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{
   206  		ExternalID:        vol.RemoteID(),
   207  		PublishContext:    publishContext,
   208  		StagingTargetPath: pluginStagingPath,
   209  		TargetPath:        pluginTargetPath,
   210  		VolumeCapability:  capabilities,
   211  		Readonly:          usage.ReadOnly,
   212  		Secrets:           vol.Secrets,
   213  	},
   214  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   215  		grpc_retry.WithMax(3),
   216  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   217  	)
   218  
   219  	return &MountInfo{Source: hostTargetPath}, err
   220  }
   221  
   222  // MountVolume performs the steps required for using a given volume
   223  // configuration for the provided allocation.
   224  // It is passed the publishContext from remote attachment, and specific usage
   225  // modes from the CSI Hook.
   226  // It then uses this state to stage and publish the volume as required for use
   227  // by the given allocation.
   228  func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (mountInfo *MountInfo, err error) {
   229  	logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID)
   230  	ctx = hclog.WithContext(ctx, logger)
   231  
   232  	if v.requiresStaging {
   233  		err = v.stageVolume(ctx, vol, usage, publishContext)
   234  	}
   235  
   236  	if err == nil {
   237  		mountInfo, err = v.publishVolume(ctx, vol, alloc, usage, publishContext)
   238  	}
   239  
   240  	if err == nil {
   241  		v.usageTracker.Claim(alloc.ID, vol.ID, usage)
   242  	}
   243  
   244  	event := structs.NewNodeEvent().
   245  		SetSubsystem(structs.NodeEventSubsystemStorage).
   246  		SetMessage("Mount volume").
   247  		AddDetail("volume_id", vol.ID)
   248  	if err == nil {
   249  		event.AddDetail("success", "true")
   250  	} else {
   251  		event.AddDetail("success", "false")
   252  		event.AddDetail("error", err.Error())
   253  	}
   254  
   255  	v.eventer(event)
   256  
   257  	return mountInfo, err
   258  }
   259  
   260  // unstageVolume is the inverse operation of `stageVolume` and must be called
   261  // once for each staging path that a volume has been staged under.
   262  // It is safe to call multiple times and a plugin is required to return OK if
   263  // the volume has been unstaged or was never staged on the node.
   264  func (v *volumeManager) unstageVolume(ctx context.Context, volID, remoteID string, usage *UsageOptions) error {
   265  	logger := hclog.FromContext(ctx)
   266  	logger.Trace("Unstaging volume")
   267  	stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage)
   268  
   269  	// CSI NodeUnstageVolume errors for timeout, codes.Unavailable and
   270  	// codes.ResourceExhausted are retried; all other errors are fatal.
   271  	return v.plugin.NodeUnstageVolume(ctx,
   272  		remoteID,
   273  		stagingPath,
   274  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   275  		grpc_retry.WithMax(3),
   276  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   277  	)
   278  }
   279  
   280  func combineErrors(maybeErrs ...error) error {
   281  	var result *multierror.Error
   282  	for _, err := range maybeErrs {
   283  		if err == nil {
   284  			continue
   285  		}
   286  
   287  		result = multierror.Append(result, err)
   288  	}
   289  
   290  	return result.ErrorOrNil()
   291  }
   292  
   293  func (v *volumeManager) unpublishVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) error {
   294  	pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, volID, allocID, usage)
   295  
   296  	// CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and
   297  	// codes.ResourceExhausted are retried; all other errors are fatal.
   298  	rpcErr := v.plugin.NodeUnpublishVolume(ctx, remoteID, pluginTargetPath,
   299  		grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout),
   300  		grpc_retry.WithMax(3),
   301  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)),
   302  	)
   303  
   304  	hostTargetPath := v.allocDirForVolume(v.mountRoot, volID, allocID, usage)
   305  	if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) {
   306  		if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") {
   307  			// host target path was already destroyed, nothing to do here.
   308  			// this helps us in the case that a previous GC attempt cleaned
   309  			// up the volume on the node but the controller RPCs failed
   310  			return nil
   311  		}
   312  		return rpcErr
   313  	}
   314  
   315  	// Host Target Path was not cleaned up, attempt to do so here. If it's still
   316  	// a mount then removing the dir will fail and we'll return any rpcErr and the
   317  	// file error.
   318  	rmErr := os.Remove(hostTargetPath)
   319  	if rmErr != nil {
   320  		return combineErrors(rpcErr, rmErr)
   321  	}
   322  
   323  	// We successfully removed the directory, return any rpcErrors that were
   324  	// encountered, but because we got here, they were probably flaky or was
   325  	// cleaned up externally. We might want to just return `nil` here in the
   326  	// future.
   327  	return rpcErr
   328  }
   329  
   330  func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) (err error) {
   331  	logger := v.logger.With("volume_id", volID, "alloc_id", allocID)
   332  	ctx = hclog.WithContext(ctx, logger)
   333  
   334  	err = v.unpublishVolume(ctx, volID, remoteID, allocID, usage)
   335  
   336  	if err == nil {
   337  		canRelease := v.usageTracker.Free(allocID, volID, usage)
   338  		if v.requiresStaging && canRelease {
   339  			err = v.unstageVolume(ctx, volID, remoteID, usage)
   340  		}
   341  	}
   342  
   343  	event := structs.NewNodeEvent().
   344  		SetSubsystem(structs.NodeEventSubsystemStorage).
   345  		SetMessage("Unmount volume").
   346  		AddDetail("volume_id", volID)
   347  	if err == nil {
   348  		event.AddDetail("success", "true")
   349  	} else {
   350  		event.AddDetail("success", "false")
   351  		event.AddDetail("error", err.Error())
   352  	}
   353  
   354  	v.eventer(event)
   355  
   356  	return err
   357  }