github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/csi_endpoint.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	metrics "github.com/armon/go-metrics"
    10  	grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
    11  	"github.com/hashicorp/nomad/client/dynamicplugins"
    12  	"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
    13  	"github.com/hashicorp/nomad/client/structs"
    14  	nstructs "github.com/hashicorp/nomad/nomad/structs"
    15  	"github.com/hashicorp/nomad/plugins/csi"
    16  )
    17  
    18  // CSI endpoint is used for interacting with CSI plugins on a client.
    19  // TODO: Submit metrics with labels to allow debugging per plugin perf problems.
    20  type CSI struct {
    21  	c *Client
    22  }
    23  
    24  const (
    25  	// CSIPluginRequestTimeout is the timeout that should be used when making reqs
    26  	// against CSI Plugins. It is copied from Kubernetes as an initial seed value.
    27  	// https://github.com/kubernetes/kubernetes/blob/e680ad7156f263a6d8129cc0117fda58602e50ad/pkg/volume/csi/csi_plugin.go#L52
    28  	CSIPluginRequestTimeout = 2 * time.Minute
    29  )
    30  
    31  var (
    32  	ErrPluginTypeError = errors.New("CSI Plugin loaded incorrectly")
    33  )
    34  
    35  // ControllerValidateVolume is used during volume registration to validate
    36  // that a volume exists and that the capabilities it was registered with are
    37  // supported by the CSI Plugin and external volume configuration.
    38  func (c *CSI) ControllerValidateVolume(req *structs.ClientCSIControllerValidateVolumeRequest, resp *structs.ClientCSIControllerValidateVolumeResponse) error {
    39  	defer metrics.MeasureSince([]string{"client", "csi_controller", "validate_volume"}, time.Now())
    40  
    41  	if req.VolumeID == "" {
    42  		return errors.New("VolumeID is required")
    43  	}
    44  
    45  	if req.PluginID == "" {
    46  		return errors.New("PluginID is required")
    47  	}
    48  
    49  	plugin, err := c.findControllerPlugin(req.PluginID)
    50  	if err != nil {
    51  		// the server's view of the plugin health is stale, so let it know it
    52  		// should retry with another controller instance
    53  		return fmt.Errorf("%w: %v", nstructs.ErrCSIClientRPCRetryable, err)
    54  	}
    55  	defer plugin.Close()
    56  
    57  	csiReq, err := req.ToCSIRequest()
    58  	if err != nil {
    59  		return err
    60  	}
    61  
    62  	ctx, cancelFn := c.requestContext()
    63  	defer cancelFn()
    64  
    65  	// CSI ValidateVolumeCapabilities errors for timeout, codes.Unavailable and
    66  	// codes.ResourceExhausted are retried; all other errors are fatal.
    67  	return plugin.ControllerValidateCapabilities(ctx, csiReq,
    68  		grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout),
    69  		grpc_retry.WithMax(3),
    70  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)))
    71  }
    72  
    73  // ControllerAttachVolume is used to attach a volume from a CSI Cluster to
    74  // the storage node provided in the request.
    75  //
    76  // The controller attachment flow currently works as follows:
    77  // 1. Validate the volume request
    78  // 2. Call ControllerPublishVolume on the CSI Plugin to trigger a remote attachment
    79  //
    80  // In the future this may be expanded to request dynamic secrets for attachment.
    81  func (c *CSI) ControllerAttachVolume(req *structs.ClientCSIControllerAttachVolumeRequest, resp *structs.ClientCSIControllerAttachVolumeResponse) error {
    82  	defer metrics.MeasureSince([]string{"client", "csi_controller", "publish_volume"}, time.Now())
    83  	plugin, err := c.findControllerPlugin(req.PluginID)
    84  	if err != nil {
    85  		// the server's view of the plugin health is stale, so let it know it
    86  		// should retry with another controller instance
    87  		return fmt.Errorf("%w: %v", nstructs.ErrCSIClientRPCRetryable, err)
    88  	}
    89  	defer plugin.Close()
    90  
    91  	// The following block of validation checks should not be reached on a
    92  	// real Nomad cluster as all of this data should be validated when registering
    93  	// volumes with the cluster. They serve as a defensive check before forwarding
    94  	// requests to plugins, and to aid with development.
    95  
    96  	if req.VolumeID == "" {
    97  		return errors.New("VolumeID is required")
    98  	}
    99  
   100  	if req.ClientCSINodeID == "" {
   101  		return errors.New("ClientCSINodeID is required")
   102  	}
   103  
   104  	csiReq, err := req.ToCSIRequest()
   105  	if err != nil {
   106  		return err
   107  	}
   108  
   109  	// Submit the request for a volume to the CSI Plugin.
   110  	ctx, cancelFn := c.requestContext()
   111  	defer cancelFn()
   112  	// CSI ControllerPublishVolume errors for timeout, codes.Unavailable and
   113  	// codes.ResourceExhausted are retried; all other errors are fatal.
   114  	cresp, err := plugin.ControllerPublishVolume(ctx, csiReq,
   115  		grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout),
   116  		grpc_retry.WithMax(3),
   117  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)))
   118  	if err != nil {
   119  		return err
   120  	}
   121  
   122  	resp.PublishContext = cresp.PublishContext
   123  	return nil
   124  }
   125  
   126  // ControllerDetachVolume is used to detach a volume from a CSI Cluster from
   127  // the storage node provided in the request.
   128  func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolumeRequest, resp *structs.ClientCSIControllerDetachVolumeResponse) error {
   129  	defer metrics.MeasureSince([]string{"client", "csi_controller", "unpublish_volume"}, time.Now())
   130  	plugin, err := c.findControllerPlugin(req.PluginID)
   131  	if err != nil {
   132  		// the server's view of the plugin health is stale, so let it know it
   133  		// should retry with another controller instance
   134  		return fmt.Errorf("%w: %v", nstructs.ErrCSIClientRPCRetryable, err)
   135  	}
   136  	defer plugin.Close()
   137  
   138  	// The following block of validation checks should not be reached on a
   139  	// real Nomad cluster as all of this data should be validated when registering
   140  	// volumes with the cluster. They serve as a defensive check before forwarding
   141  	// requests to plugins, and to aid with development.
   142  
   143  	if req.VolumeID == "" {
   144  		return errors.New("VolumeID is required")
   145  	}
   146  
   147  	if req.ClientCSINodeID == "" {
   148  		return errors.New("ClientCSINodeID is required")
   149  	}
   150  
   151  	csiReq := req.ToCSIRequest()
   152  
   153  	// Submit the request for a volume to the CSI Plugin.
   154  	ctx, cancelFn := c.requestContext()
   155  	defer cancelFn()
   156  	// CSI ControllerUnpublishVolume errors for timeout, codes.Unavailable and
   157  	// codes.ResourceExhausted are retried; all other errors are fatal.
   158  	_, err = plugin.ControllerUnpublishVolume(ctx, csiReq,
   159  		grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout),
   160  		grpc_retry.WithMax(3),
   161  		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)))
   162  	if err != nil {
   163  		if errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) {
   164  			// if the controller detach previously happened but the server failed to
   165  			// checkpoint, we'll get an error from the plugin but can safely ignore it.
   166  			c.c.logger.Debug("could not unpublish volume: %v", err)
   167  			return nil
   168  		}
   169  		return err
   170  	}
   171  	return nil
   172  }
   173  
   174  // NodeDetachVolume is used to detach a volume from a CSI Cluster from
   175  // the storage node provided in the request.
   176  func (c *CSI) NodeDetachVolume(req *structs.ClientCSINodeDetachVolumeRequest, resp *structs.ClientCSINodeDetachVolumeResponse) error {
   177  	defer metrics.MeasureSince([]string{"client", "csi_node", "detach_volume"}, time.Now())
   178  
   179  	// The following block of validation checks should not be reached on a
   180  	// real Nomad cluster. They serve as a defensive check before forwarding
   181  	// requests to plugins, and to aid with development.
   182  	if req.PluginID == "" {
   183  		return errors.New("PluginID is required")
   184  	}
   185  	if req.VolumeID == "" {
   186  		return errors.New("VolumeID is required")
   187  	}
   188  	if req.AllocID == "" {
   189  		return errors.New("AllocID is required")
   190  	}
   191  
   192  	ctx, cancelFn := c.requestContext()
   193  	defer cancelFn()
   194  
   195  	mounter, err := c.c.csimanager.MounterForPlugin(ctx, req.PluginID)
   196  	if err != nil {
   197  		return err
   198  	}
   199  
   200  	usageOpts := &csimanager.UsageOptions{
   201  		ReadOnly:       req.ReadOnly,
   202  		AttachmentMode: string(req.AttachmentMode),
   203  		AccessMode:     string(req.AccessMode),
   204  	}
   205  
   206  	err = mounter.UnmountVolume(ctx, req.VolumeID, req.ExternalID, req.AllocID, usageOpts)
   207  	if err != nil && !errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) {
   208  		// if the unmounting previously happened but the server failed to
   209  		// checkpoint, we'll get an error from Unmount but can safely
   210  		// ignore it.
   211  		return err
   212  	}
   213  	return nil
   214  }
   215  
   216  func (c *CSI) findControllerPlugin(name string) (csi.CSIPlugin, error) {
   217  	return c.findPlugin(dynamicplugins.PluginTypeCSIController, name)
   218  }
   219  
   220  func (c *CSI) findPlugin(ptype, name string) (csi.CSIPlugin, error) {
   221  	pIface, err := c.c.dynamicRegistry.DispensePlugin(ptype, name)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  
   226  	plugin, ok := pIface.(csi.CSIPlugin)
   227  	if !ok {
   228  		return nil, ErrPluginTypeError
   229  	}
   230  
   231  	return plugin, nil
   232  }
   233  
   234  func (c *CSI) requestContext() (context.Context, context.CancelFunc) {
   235  	return context.WithTimeout(context.Background(), CSIPluginRequestTimeout)
   236  }