github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/plugins/csi/client.go (about)

     1  package csi
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"net"
     8  	"os"
     9  	"time"
    10  
    11  	csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
    12  	"github.com/hashicorp/go-hclog"
    13  	multierror "github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/nomad/helper/grpc-middleware/logging"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/hashicorp/nomad/plugins/base"
    17  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    18  	"golang.org/x/exp/maps"
    19  	"google.golang.org/grpc"
    20  	"google.golang.org/grpc/codes"
    21  	"google.golang.org/grpc/status"
    22  )
    23  
    24  // PluginTypeCSI implements the CSI plugin interface
    25  const PluginTypeCSI = "csi"
    26  
    27  type NodeGetInfoResponse struct {
    28  	NodeID             string
    29  	MaxVolumes         int64
    30  	AccessibleTopology *Topology
    31  }
    32  
    33  // Topology is a map of topological domains to topological segments.
    34  // A topological domain is a sub-division of a cluster, like "region",
    35  // "zone", "rack", etc.
    36  //
    37  // According to CSI, there are a few requirements for the keys within this map:
    38  //   - Valid keys have two segments: an OPTIONAL prefix and name, separated
    39  //     by a slash (/), for example: "com.company.example/zone".
    40  //   - The key name segment is REQUIRED. The prefix is OPTIONAL.
    41  //   - The key name MUST be 63 characters or less, begin and end with an
    42  //     alphanumeric character ([a-z0-9A-Z]), and contain only dashes (-),
    43  //     underscores (_), dots (.), or alphanumerics in between, for example
    44  //     "zone".
    45  //   - The key prefix MUST be 63 characters or less, begin and end with a
    46  //     lower-case alphanumeric character ([a-z0-9]), contain only
    47  //     dashes (-), dots (.), or lower-case alphanumerics in between, and
    48  //     follow domain name notation format
    49  //     (https://tools.ietf.org/html/rfc1035#section-2.3.1).
    50  //   - The key prefix SHOULD include the plugin's host company name and/or
    51  //     the plugin name, to minimize the possibility of collisions with keys
    52  //     from other plugins.
    53  //   - If a key prefix is specified, it MUST be identical across all
    54  //     topology keys returned by the SP (across all RPCs).
    55  //   - Keys MUST be case-insensitive. Meaning the keys "Zone" and "zone"
    56  //     MUST not both exist.
    57  //   - Each value (topological segment) MUST contain 1 or more strings.
    58  //   - Each string MUST be 63 characters or less and begin and end with an
    59  //     alphanumeric character with '-', '_', '.', or alphanumerics in
    60  //     between.
    61  type Topology struct {
    62  	Segments map[string]string
    63  }
    64  
    65  // CSIControllerClient defines the minimal CSI Controller Plugin interface used
    66  // by nomad to simplify the interface required for testing.
    67  type CSIControllerClient interface {
    68  	ControllerGetCapabilities(ctx context.Context, in *csipbv1.ControllerGetCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.ControllerGetCapabilitiesResponse, error)
    69  	ControllerPublishVolume(ctx context.Context, in *csipbv1.ControllerPublishVolumeRequest, opts ...grpc.CallOption) (*csipbv1.ControllerPublishVolumeResponse, error)
    70  	ControllerUnpublishVolume(ctx context.Context, in *csipbv1.ControllerUnpublishVolumeRequest, opts ...grpc.CallOption) (*csipbv1.ControllerUnpublishVolumeResponse, error)
    71  	ValidateVolumeCapabilities(ctx context.Context, in *csipbv1.ValidateVolumeCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.ValidateVolumeCapabilitiesResponse, error)
    72  	CreateVolume(ctx context.Context, in *csipbv1.CreateVolumeRequest, opts ...grpc.CallOption) (*csipbv1.CreateVolumeResponse, error)
    73  	ListVolumes(ctx context.Context, in *csipbv1.ListVolumesRequest, opts ...grpc.CallOption) (*csipbv1.ListVolumesResponse, error)
    74  	DeleteVolume(ctx context.Context, in *csipbv1.DeleteVolumeRequest, opts ...grpc.CallOption) (*csipbv1.DeleteVolumeResponse, error)
    75  	CreateSnapshot(ctx context.Context, in *csipbv1.CreateSnapshotRequest, opts ...grpc.CallOption) (*csipbv1.CreateSnapshotResponse, error)
    76  	DeleteSnapshot(ctx context.Context, in *csipbv1.DeleteSnapshotRequest, opts ...grpc.CallOption) (*csipbv1.DeleteSnapshotResponse, error)
    77  	ListSnapshots(ctx context.Context, in *csipbv1.ListSnapshotsRequest, opts ...grpc.CallOption) (*csipbv1.ListSnapshotsResponse, error)
    78  }
    79  
    80  // CSINodeClient defines the minimal CSI Node Plugin interface used
    81  // by nomad to simplify the interface required for testing.
    82  type CSINodeClient interface {
    83  	NodeGetCapabilities(ctx context.Context, in *csipbv1.NodeGetCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.NodeGetCapabilitiesResponse, error)
    84  	NodeGetInfo(ctx context.Context, in *csipbv1.NodeGetInfoRequest, opts ...grpc.CallOption) (*csipbv1.NodeGetInfoResponse, error)
    85  	NodeStageVolume(ctx context.Context, in *csipbv1.NodeStageVolumeRequest, opts ...grpc.CallOption) (*csipbv1.NodeStageVolumeResponse, error)
    86  	NodeUnstageVolume(ctx context.Context, in *csipbv1.NodeUnstageVolumeRequest, opts ...grpc.CallOption) (*csipbv1.NodeUnstageVolumeResponse, error)
    87  	NodePublishVolume(ctx context.Context, in *csipbv1.NodePublishVolumeRequest, opts ...grpc.CallOption) (*csipbv1.NodePublishVolumeResponse, error)
    88  	NodeUnpublishVolume(ctx context.Context, in *csipbv1.NodeUnpublishVolumeRequest, opts ...grpc.CallOption) (*csipbv1.NodeUnpublishVolumeResponse, error)
    89  }
    90  
    91  type client struct {
    92  	addr             string
    93  	conn             *grpc.ClientConn
    94  	identityClient   csipbv1.IdentityClient
    95  	controllerClient CSIControllerClient
    96  	nodeClient       CSINodeClient
    97  	logger           hclog.Logger
    98  }
    99  
   100  func (c *client) Close() error {
   101  	if c.conn != nil {
   102  		return c.conn.Close()
   103  	}
   104  	return nil
   105  }
   106  
   107  func NewClient(addr string, logger hclog.Logger) CSIPlugin {
   108  	return &client{
   109  		addr:   addr,
   110  		logger: logger,
   111  	}
   112  }
   113  
   114  func (c *client) ensureConnected(ctx context.Context) error {
   115  	if c == nil {
   116  		return fmt.Errorf("client not initialized")
   117  	}
   118  	if c.conn != nil {
   119  		return nil
   120  	}
   121  	if c.addr == "" {
   122  		return fmt.Errorf("address is empty")
   123  	}
   124  	var conn *grpc.ClientConn
   125  	var err error
   126  	t := time.NewTimer(0)
   127  	for {
   128  		select {
   129  		case <-ctx.Done():
   130  			return fmt.Errorf("timeout while connecting to gRPC socket: %v", err)
   131  		case <-t.C:
   132  			_, err = os.Stat(c.addr)
   133  			if err != nil {
   134  				err = fmt.Errorf("failed to stat socket: %v", err)
   135  				t.Reset(5 * time.Second)
   136  				continue
   137  			}
   138  			conn, err = newGrpcConn(c.addr, c.logger)
   139  			if err != nil {
   140  				err = fmt.Errorf("failed to create gRPC connection: %v", err)
   141  				t.Reset(time.Second * 5)
   142  				continue
   143  			}
   144  			c.conn = conn
   145  			c.identityClient = csipbv1.NewIdentityClient(conn)
   146  			c.controllerClient = csipbv1.NewControllerClient(conn)
   147  			c.nodeClient = csipbv1.NewNodeClient(conn)
   148  			return nil
   149  		}
   150  	}
   151  }
   152  
   153  func newGrpcConn(addr string, logger hclog.Logger) (*grpc.ClientConn, error) {
   154  	// after DialContext returns w/ initial connection, closing this
   155  	// context is a no-op
   156  	connectCtx, cancel := context.WithTimeout(context.Background(), time.Second*1)
   157  	defer cancel()
   158  	conn, err := grpc.DialContext(
   159  		connectCtx,
   160  		addr,
   161  		grpc.WithBlock(),
   162  		grpc.WithInsecure(),
   163  		grpc.WithUnaryInterceptor(logging.UnaryClientInterceptor(logger)),
   164  		grpc.WithStreamInterceptor(logging.StreamClientInterceptor(logger)),
   165  		grpc.WithAuthority("localhost"),
   166  		grpc.WithDialer(func(target string, timeout time.Duration) (net.Conn, error) {
   167  			return net.DialTimeout("unix", target, timeout)
   168  		}),
   169  	)
   170  
   171  	if err != nil {
   172  		return nil, fmt.Errorf("failed to open grpc connection to addr: %s, err: %v", addr, err)
   173  	}
   174  
   175  	return conn, nil
   176  }
   177  
   178  // PluginInfo describes the type and version of a plugin as required by the nomad
   179  // base.BasePlugin interface.
   180  func (c *client) PluginInfo() (*base.PluginInfoResponse, error) {
   181  	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   182  	defer cancel()
   183  	if err := c.ensureConnected(ctx); err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	// note: no grpc retries needed here, as this is called in
   188  	// fingerprinting and will get retried by the caller.
   189  	name, version, err := c.PluginGetInfo(ctx)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  
   194  	return &base.PluginInfoResponse{
   195  		Type:              PluginTypeCSI,     // note: this isn't a Nomad go-plugin type
   196  		PluginApiVersions: []string{"1.0.0"}, // TODO(tgross): we want to fingerprint spec version, but this isn't included as a field from the plugins
   197  		PluginVersion:     version,
   198  		Name:              name,
   199  	}, nil
   200  }
   201  
   202  // ConfigSchema returns the schema for parsing the plugins configuration as
   203  // required by the base.BasePlugin interface. It will always return nil.
   204  func (c *client) ConfigSchema() (*hclspec.Spec, error) {
   205  	return nil, nil
   206  }
   207  
   208  // SetConfig is used to set the configuration by passing a MessagePack
   209  // encoding of it.
   210  func (c *client) SetConfig(_ *base.Config) error {
   211  	return fmt.Errorf("unsupported")
   212  }
   213  
   214  func (c *client) PluginProbe(ctx context.Context) (bool, error) {
   215  	if err := c.ensureConnected(ctx); err != nil {
   216  		return false, err
   217  	}
   218  
   219  	// note: no grpc retries should be done here
   220  	req, err := c.identityClient.Probe(ctx, &csipbv1.ProbeRequest{})
   221  	if err != nil {
   222  		return false, err
   223  	}
   224  
   225  	wrapper := req.GetReady()
   226  
   227  	// wrapper.GetValue() protects against wrapper being `nil`, and returns false.
   228  	ready := wrapper.GetValue()
   229  
   230  	if wrapper == nil {
   231  		// If the plugin returns a nil value for ready, then it should be
   232  		// interpreted as the plugin is ready for compatibility with plugins that
   233  		// do not do health checks.
   234  		ready = true
   235  	}
   236  
   237  	return ready, nil
   238  }
   239  
   240  func (c *client) PluginGetInfo(ctx context.Context) (string, string, error) {
   241  	if err := c.ensureConnected(ctx); err != nil {
   242  		return "", "", err
   243  	}
   244  
   245  	resp, err := c.identityClient.GetPluginInfo(ctx, &csipbv1.GetPluginInfoRequest{})
   246  	if err != nil {
   247  		return "", "", err
   248  	}
   249  
   250  	name := resp.GetName()
   251  	if name == "" {
   252  		return "", "", fmt.Errorf("PluginGetInfo: plugin returned empty name field")
   253  	}
   254  	version := resp.GetVendorVersion()
   255  
   256  	return name, version, nil
   257  }
   258  
   259  func (c *client) PluginGetCapabilities(ctx context.Context) (*PluginCapabilitySet, error) {
   260  	if err := c.ensureConnected(ctx); err != nil {
   261  		return nil, err
   262  	}
   263  
   264  	// note: no grpc retries needed here, as this is called in
   265  	// fingerprinting and will get retried by the caller
   266  	resp, err := c.identityClient.GetPluginCapabilities(ctx,
   267  		&csipbv1.GetPluginCapabilitiesRequest{})
   268  	if err != nil {
   269  		return nil, err
   270  	}
   271  
   272  	return NewPluginCapabilitySet(resp), nil
   273  }
   274  
   275  //
   276  // Controller Endpoints
   277  //
   278  
   279  func (c *client) ControllerGetCapabilities(ctx context.Context) (*ControllerCapabilitySet, error) {
   280  	if err := c.ensureConnected(ctx); err != nil {
   281  		return nil, err
   282  	}
   283  
   284  	// note: no grpc retries needed here, as this is called in
   285  	// fingerprinting and will get retried by the caller
   286  	resp, err := c.controllerClient.ControllerGetCapabilities(ctx,
   287  		&csipbv1.ControllerGetCapabilitiesRequest{})
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  
   292  	return NewControllerCapabilitySet(resp), nil
   293  }
   294  
   295  func (c *client) ControllerPublishVolume(ctx context.Context, req *ControllerPublishVolumeRequest, opts ...grpc.CallOption) (*ControllerPublishVolumeResponse, error) {
   296  	if err := c.ensureConnected(ctx); err != nil {
   297  		return nil, err
   298  	}
   299  
   300  	err := req.Validate()
   301  	if err != nil {
   302  		return nil, err
   303  	}
   304  
   305  	pbrequest := req.ToCSIRepresentation()
   306  	resp, err := c.controllerClient.ControllerPublishVolume(ctx, pbrequest, opts...)
   307  	if err != nil {
   308  		code := status.Code(err)
   309  		switch code {
   310  		case codes.NotFound:
   311  			err = fmt.Errorf("volume %q or node %q could not be found: %v",
   312  				req.ExternalID, req.NodeID, err)
   313  		case codes.AlreadyExists:
   314  			err = fmt.Errorf(
   315  				"volume %q is already published at node %q but with capabilities or a read_only setting incompatible with this request: %v",
   316  				req.ExternalID, req.NodeID, err)
   317  		case codes.ResourceExhausted:
   318  			err = fmt.Errorf("node %q has reached the maximum allowable number of attached volumes: %v",
   319  				req.NodeID, err)
   320  		case codes.FailedPrecondition:
   321  			err = fmt.Errorf("volume %q is already published on another node and does not have MULTI_NODE volume capability: %v",
   322  				req.ExternalID, err)
   323  		case codes.Internal:
   324  			err = fmt.Errorf("controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   325  		}
   326  		return nil, err
   327  	}
   328  
   329  	return &ControllerPublishVolumeResponse{
   330  		PublishContext: maps.Clone(resp.PublishContext),
   331  	}, nil
   332  }
   333  
   334  func (c *client) ControllerUnpublishVolume(ctx context.Context, req *ControllerUnpublishVolumeRequest, opts ...grpc.CallOption) (*ControllerUnpublishVolumeResponse, error) {
   335  	if err := c.ensureConnected(ctx); err != nil {
   336  		return nil, err
   337  	}
   338  	err := req.Validate()
   339  	if err != nil {
   340  		return nil, err
   341  	}
   342  
   343  	upbrequest := req.ToCSIRepresentation()
   344  	_, err = c.controllerClient.ControllerUnpublishVolume(ctx, upbrequest, opts...)
   345  	if err != nil {
   346  		code := status.Code(err)
   347  		switch code {
   348  		case codes.NotFound:
   349  			// we'll have validated the volume and node *should* exist at the
   350  			// server, so if we get a not-found here it's because we've previously
   351  			// checkpointed. we'll return an error so the caller can log it for
   352  			// diagnostic purposes.
   353  			err = fmt.Errorf("%w: volume %q or node %q could not be found: %v",
   354  				structs.ErrCSIClientRPCIgnorable, req.ExternalID, req.NodeID, err)
   355  		case codes.Internal:
   356  			err = fmt.Errorf("controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   357  		}
   358  		return nil, err
   359  	}
   360  
   361  	return &ControllerUnpublishVolumeResponse{}, nil
   362  }
   363  
   364  func (c *client) ControllerValidateCapabilities(ctx context.Context, req *ControllerValidateVolumeRequest, opts ...grpc.CallOption) error {
   365  	if err := c.ensureConnected(ctx); err != nil {
   366  		return err
   367  	}
   368  	if req.ExternalID == "" {
   369  		return fmt.Errorf("missing volume ID")
   370  	}
   371  
   372  	if req.Capabilities == nil {
   373  		return fmt.Errorf("missing Capabilities")
   374  	}
   375  
   376  	creq := req.ToCSIRepresentation()
   377  	resp, err := c.controllerClient.ValidateVolumeCapabilities(ctx, creq, opts...)
   378  	if err != nil {
   379  		code := status.Code(err)
   380  		switch code {
   381  		case codes.NotFound:
   382  			err = fmt.Errorf("volume %q could not be found: %v", req.ExternalID, err)
   383  		case codes.Internal:
   384  			err = fmt.Errorf("controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   385  		}
   386  		return err
   387  	}
   388  
   389  	if resp.Message != "" {
   390  		// this should only ever be set if Confirmed isn't set, but
   391  		// it's not a validation failure.
   392  		c.logger.Debug(resp.Message)
   393  	}
   394  
   395  	// The protobuf accessors below safely handle nil pointers.
   396  	// The CSI spec says we can only assert the plugin has
   397  	// confirmed the volume capabilities, not that it hasn't
   398  	// confirmed them, so if the field is nil we have to assume
   399  	// the volume is ok.
   400  	confirmedCaps := resp.GetConfirmed().GetVolumeCapabilities()
   401  	if confirmedCaps != nil {
   402  		for _, requestedCap := range creq.VolumeCapabilities {
   403  			err := compareCapabilities(requestedCap, confirmedCaps)
   404  			if err != nil {
   405  				return fmt.Errorf("volume capability validation failed: %v", err)
   406  			}
   407  		}
   408  	}
   409  
   410  	return nil
   411  }
   412  
   413  func (c *client) ControllerCreateVolume(ctx context.Context, req *ControllerCreateVolumeRequest, opts ...grpc.CallOption) (*ControllerCreateVolumeResponse, error) {
   414  	if err := c.ensureConnected(ctx); err != nil {
   415  		return nil, err
   416  	}
   417  
   418  	err := req.Validate()
   419  	if err != nil {
   420  		return nil, err
   421  	}
   422  	creq := req.ToCSIRepresentation()
   423  	resp, err := c.controllerClient.CreateVolume(ctx, creq, opts...)
   424  
   425  	// these standard gRPC error codes are overloaded with CSI-specific
   426  	// meanings, so translate them into user-understandable terms
   427  	// https://github.com/container-storage-interface/spec/blob/master/spec.md#createvolume-errors
   428  	if err != nil {
   429  		code := status.Code(err)
   430  		switch code {
   431  		case codes.InvalidArgument:
   432  			return nil, fmt.Errorf(
   433  				"volume %q snapshot source %q is not compatible with these parameters: %v",
   434  				req.Name, req.ContentSource, err)
   435  		case codes.NotFound:
   436  			return nil, fmt.Errorf(
   437  				"volume %q content source %q does not exist: %v",
   438  				req.Name, req.ContentSource, err)
   439  		case codes.AlreadyExists:
   440  			return nil, fmt.Errorf(
   441  				"volume %q already exists but is incompatible with these parameters: %v",
   442  				req.Name, err)
   443  		case codes.ResourceExhausted:
   444  			return nil, fmt.Errorf(
   445  				"unable to provision %q in accessible_topology: %v",
   446  				req.Name, err)
   447  		case codes.OutOfRange:
   448  			return nil, fmt.Errorf(
   449  				"unsupported capacity_range for volume %q: %v", req.Name, err)
   450  		case codes.Internal:
   451  			return nil, fmt.Errorf(
   452  				"controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   453  		}
   454  		return nil, err
   455  	}
   456  
   457  	return NewCreateVolumeResponse(resp), nil
   458  }
   459  
   460  func (c *client) ControllerListVolumes(ctx context.Context, req *ControllerListVolumesRequest, opts ...grpc.CallOption) (*ControllerListVolumesResponse, error) {
   461  	if err := c.ensureConnected(ctx); err != nil {
   462  		return nil, err
   463  	}
   464  
   465  	err := req.Validate()
   466  	if err != nil {
   467  		return nil, err
   468  	}
   469  	creq := req.ToCSIRepresentation()
   470  	resp, err := c.controllerClient.ListVolumes(ctx, creq, opts...)
   471  	if err != nil {
   472  		code := status.Code(err)
   473  		switch code {
   474  		case codes.Aborted:
   475  			return nil, fmt.Errorf(
   476  				"invalid starting token %q: %v", req.StartingToken, err)
   477  		case codes.Internal:
   478  			return nil, fmt.Errorf(
   479  				"controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   480  		}
   481  		return nil, err
   482  	}
   483  	return NewListVolumesResponse(resp), nil
   484  }
   485  
   486  func (c *client) ControllerDeleteVolume(ctx context.Context, req *ControllerDeleteVolumeRequest, opts ...grpc.CallOption) error {
   487  	if err := c.ensureConnected(ctx); err != nil {
   488  		return err
   489  	}
   490  
   491  	err := req.Validate()
   492  	if err != nil {
   493  		return err
   494  	}
   495  	creq := req.ToCSIRepresentation()
   496  	_, err = c.controllerClient.DeleteVolume(ctx, creq, opts...)
   497  	if err != nil {
   498  		code := status.Code(err)
   499  		switch code {
   500  		case codes.FailedPrecondition:
   501  			return fmt.Errorf("volume %q is in use: %v", req.ExternalVolumeID, err)
   502  		case codes.Internal:
   503  			return fmt.Errorf(
   504  				"controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   505  		}
   506  	}
   507  	return err
   508  }
   509  
   510  // compareCapabilities returns an error if the 'got' capabilities aren't found
   511  // within the 'expected' capability.
   512  //
   513  // Note that plugins in the wild are known to return incomplete
   514  // VolumeCapability responses, so we can't require that all capabilities we
   515  // expect have been validated, only that the ones that have been validated
   516  // match. This appears to violate the CSI specification but until that's been
   517  // resolved in upstream we have to loosen our validation requirements. The
   518  // tradeoff is that we're more likely to have runtime errors during
   519  // NodeStageVolume.
   520  func compareCapabilities(expected *csipbv1.VolumeCapability, got []*csipbv1.VolumeCapability) error {
   521  	var err multierror.Error
   522  NEXT_CAP:
   523  	for _, cap := range got {
   524  
   525  		expectedMode := expected.GetAccessMode().GetMode()
   526  		capMode := cap.GetAccessMode().GetMode()
   527  
   528  		// The plugin may not validate AccessMode, in which case we'll
   529  		// get UNKNOWN as our response
   530  		if capMode != csipbv1.VolumeCapability_AccessMode_UNKNOWN {
   531  			if expectedMode != capMode {
   532  				multierror.Append(&err,
   533  					fmt.Errorf("requested access mode %v, got %v", expectedMode, capMode))
   534  				continue NEXT_CAP
   535  			}
   536  		}
   537  
   538  		capBlock := cap.GetBlock()
   539  		capMount := cap.GetMount()
   540  		expectedBlock := expected.GetBlock()
   541  		expectedMount := expected.GetMount()
   542  
   543  		if capBlock != nil && expectedBlock == nil {
   544  			multierror.Append(&err, fmt.Errorf(
   545  				"'block-device' access type was not requested but was validated by the controller"))
   546  			continue NEXT_CAP
   547  		}
   548  
   549  		if capMount == nil {
   550  			continue NEXT_CAP
   551  		}
   552  
   553  		if expectedMount == nil {
   554  			multierror.Append(&err, fmt.Errorf(
   555  				"'file-system' access type was not requested but was validated by the controller"))
   556  			continue NEXT_CAP
   557  		}
   558  
   559  		if expectedMount.FsType != capMount.FsType {
   560  			multierror.Append(&err, fmt.Errorf(
   561  				"requested filesystem type %v, got %v",
   562  				expectedMount.FsType, capMount.FsType))
   563  			continue NEXT_CAP
   564  		}
   565  
   566  		for _, expectedFlag := range expectedMount.MountFlags {
   567  			var ok bool
   568  			for _, flag := range capMount.MountFlags {
   569  				if expectedFlag == flag {
   570  					ok = true
   571  					break
   572  				}
   573  			}
   574  			if !ok {
   575  				// mount flags can contain sensitive data, so we can't log details
   576  				multierror.Append(&err, fmt.Errorf(
   577  					"requested mount flags did not match available capabilities"))
   578  				continue NEXT_CAP
   579  			}
   580  		}
   581  
   582  		return nil
   583  	}
   584  	return err.ErrorOrNil()
   585  }
   586  
   587  func (c *client) ControllerCreateSnapshot(ctx context.Context, req *ControllerCreateSnapshotRequest, opts ...grpc.CallOption) (*ControllerCreateSnapshotResponse, error) {
   588  	if err := c.ensureConnected(ctx); err != nil {
   589  		return nil, err
   590  	}
   591  
   592  	err := req.Validate()
   593  	if err != nil {
   594  		return nil, err
   595  	}
   596  	creq := req.ToCSIRepresentation()
   597  	resp, err := c.controllerClient.CreateSnapshot(ctx, creq, opts...)
   598  
   599  	// these standard gRPC error codes are overloaded with CSI-specific
   600  	// meanings, so translate them into user-understandable terms
   601  	// https://github.com/container-storage-interface/spec/blob/master/spec.md#createsnapshot-errors
   602  	if err != nil {
   603  		code := status.Code(err)
   604  		switch code {
   605  		case codes.AlreadyExists:
   606  			return nil, fmt.Errorf(
   607  				"snapshot %q already exists but is incompatible with volume ID %q: %v",
   608  				req.Name, req.VolumeID, err)
   609  		case codes.Aborted:
   610  			return nil, fmt.Errorf(
   611  				"snapshot %q is already pending: %v",
   612  				req.Name, err)
   613  		case codes.ResourceExhausted:
   614  			return nil, fmt.Errorf(
   615  				"storage provider does not have enough space for this snapshot: %v", err)
   616  		case codes.Internal:
   617  			return nil, fmt.Errorf(
   618  				"controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   619  		}
   620  		return nil, err
   621  	}
   622  
   623  	snap := resp.GetSnapshot()
   624  	return &ControllerCreateSnapshotResponse{
   625  		Snapshot: &Snapshot{
   626  			ID:             snap.GetSnapshotId(),
   627  			SourceVolumeID: snap.GetSourceVolumeId(),
   628  			SizeBytes:      snap.GetSizeBytes(),
   629  			CreateTime:     snap.GetCreationTime().GetSeconds(),
   630  			IsReady:        snap.GetReadyToUse(),
   631  		},
   632  	}, nil
   633  }
   634  
   635  func (c *client) ControllerDeleteSnapshot(ctx context.Context, req *ControllerDeleteSnapshotRequest, opts ...grpc.CallOption) error {
   636  	if err := c.ensureConnected(ctx); err != nil {
   637  		return err
   638  	}
   639  
   640  	err := req.Validate()
   641  	if err != nil {
   642  		return err
   643  	}
   644  	creq := req.ToCSIRepresentation()
   645  	_, err = c.controllerClient.DeleteSnapshot(ctx, creq, opts...)
   646  
   647  	// these standard gRPC error codes are overloaded with CSI-specific
   648  	// meanings, so translate them into user-understandable terms
   649  	// https://github.com/container-storage-interface/spec/blob/master/spec.md#deletesnapshot-errors
   650  	if err != nil {
   651  		code := status.Code(err)
   652  		switch code {
   653  		case codes.FailedPrecondition:
   654  			return fmt.Errorf(
   655  				"snapshot %q could not be deleted because it is in use: %v",
   656  				req.SnapshotID, err)
   657  		case codes.Aborted:
   658  			return fmt.Errorf("snapshot %q has a pending operation: %v", req.SnapshotID, err)
   659  		case codes.Internal:
   660  			return fmt.Errorf(
   661  				"controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   662  		}
   663  		return err
   664  	}
   665  
   666  	return nil
   667  }
   668  
   669  func (c *client) ControllerListSnapshots(ctx context.Context, req *ControllerListSnapshotsRequest, opts ...grpc.CallOption) (*ControllerListSnapshotsResponse, error) {
   670  	if err := c.ensureConnected(ctx); err != nil {
   671  		return nil, err
   672  	}
   673  
   674  	err := req.Validate()
   675  	if err != nil {
   676  		return nil, err
   677  	}
   678  	creq := req.ToCSIRepresentation()
   679  	resp, err := c.controllerClient.ListSnapshots(ctx, creq, opts...)
   680  
   681  	// these standard gRPC error codes are overloaded with CSI-specific
   682  	// meanings, so translate them into user-understandable terms
   683  	// https://github.com/container-storage-interface/spec/blob/master/spec.md#listsnapshot-errors
   684  	if err != nil {
   685  		code := status.Code(err)
   686  		switch code {
   687  		case codes.Aborted:
   688  			return nil, fmt.Errorf(
   689  				"invalid starting token %q: %v", req.StartingToken, err)
   690  		case codes.Internal:
   691  			return nil, fmt.Errorf(
   692  				"controller plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   693  		}
   694  		return nil, err
   695  	}
   696  
   697  	return NewListSnapshotsResponse(resp), nil
   698  }
   699  
   700  //
   701  // Node Endpoints
   702  //
   703  
   704  func (c *client) NodeGetCapabilities(ctx context.Context) (*NodeCapabilitySet, error) {
   705  	if err := c.ensureConnected(ctx); err != nil {
   706  		return nil, err
   707  	}
   708  
   709  	// note: no grpc retries needed here, as this is called in
   710  	// fingerprinting and will get retried by the caller
   711  	resp, err := c.nodeClient.NodeGetCapabilities(ctx, &csipbv1.NodeGetCapabilitiesRequest{})
   712  	if err != nil {
   713  		return nil, err
   714  	}
   715  
   716  	return NewNodeCapabilitySet(resp), nil
   717  }
   718  
   719  func (c *client) NodeGetInfo(ctx context.Context) (*NodeGetInfoResponse, error) {
   720  	if err := c.ensureConnected(ctx); err != nil {
   721  		return nil, err
   722  	}
   723  
   724  	result := &NodeGetInfoResponse{}
   725  
   726  	// note: no grpc retries needed here, as this is called in
   727  	// fingerprinting and will get retried by the caller
   728  	resp, err := c.nodeClient.NodeGetInfo(ctx, &csipbv1.NodeGetInfoRequest{})
   729  	if err != nil {
   730  		return nil, err
   731  	}
   732  
   733  	if resp.GetNodeId() == "" {
   734  		return nil, fmt.Errorf("plugin failed to return nodeid")
   735  	}
   736  
   737  	result.NodeID = resp.GetNodeId()
   738  	result.MaxVolumes = resp.GetMaxVolumesPerNode()
   739  	if result.MaxVolumes == 0 {
   740  		// set safe default so that scheduler ignores this constraint when not set
   741  		result.MaxVolumes = math.MaxInt64
   742  	}
   743  
   744  	topo := resp.GetAccessibleTopology()
   745  	if topo != nil {
   746  		result.AccessibleTopology = &Topology{Segments: topo.Segments}
   747  	}
   748  
   749  	return result, nil
   750  }
   751  
   752  func (c *client) NodeStageVolume(ctx context.Context, req *NodeStageVolumeRequest, opts ...grpc.CallOption) error {
   753  	if err := c.ensureConnected(ctx); err != nil {
   754  		return err
   755  	}
   756  	err := req.Validate()
   757  	if err != nil {
   758  		return err
   759  	}
   760  
   761  	// NodeStageVolume's response contains no extra data. If err == nil, we were
   762  	// successful.
   763  	_, err = c.nodeClient.NodeStageVolume(ctx, req.ToCSIRepresentation(), opts...)
   764  	if err != nil {
   765  		code := status.Code(err)
   766  		switch code {
   767  		case codes.NotFound:
   768  			err = fmt.Errorf("volume %q could not be found: %v", req.ExternalID, err)
   769  		case codes.AlreadyExists:
   770  			err = fmt.Errorf(
   771  				"volume %q is already staged to %q but with incompatible capabilities for this request: %v",
   772  				req.ExternalID, req.StagingTargetPath, err)
   773  		case codes.FailedPrecondition:
   774  			err = fmt.Errorf("volume %q is already published on another node and does not have MULTI_NODE volume capability: %v",
   775  				req.ExternalID, err)
   776  		case codes.Internal:
   777  			err = fmt.Errorf("node plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   778  		}
   779  	}
   780  
   781  	return err
   782  }
   783  
   784  func (c *client) NodeUnstageVolume(ctx context.Context, volumeID string, stagingTargetPath string, opts ...grpc.CallOption) error {
   785  	if err := c.ensureConnected(ctx); err != nil {
   786  		return err
   787  	}
   788  	// These errors should not be returned during production use but exist as aids
   789  	// during Nomad development
   790  	if volumeID == "" {
   791  		return fmt.Errorf("missing volumeID")
   792  	}
   793  	if stagingTargetPath == "" {
   794  		return fmt.Errorf("missing stagingTargetPath")
   795  	}
   796  
   797  	req := &csipbv1.NodeUnstageVolumeRequest{
   798  		VolumeId:          volumeID,
   799  		StagingTargetPath: stagingTargetPath,
   800  	}
   801  
   802  	// NodeUnstageVolume's response contains no extra data. If err == nil, we were
   803  	// successful.
   804  	_, err := c.nodeClient.NodeUnstageVolume(ctx, req, opts...)
   805  	if err != nil {
   806  		code := status.Code(err)
   807  		switch code {
   808  		case codes.NotFound:
   809  			err = fmt.Errorf("%w: volume %q could not be found: %v",
   810  				structs.ErrCSIClientRPCIgnorable, volumeID, err)
   811  		case codes.Internal:
   812  			err = fmt.Errorf("node plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   813  		}
   814  	}
   815  
   816  	return err
   817  }
   818  
   819  func (c *client) NodePublishVolume(ctx context.Context, req *NodePublishVolumeRequest, opts ...grpc.CallOption) error {
   820  	if err := c.ensureConnected(ctx); err != nil {
   821  		return err
   822  	}
   823  	if err := req.Validate(); err != nil {
   824  		return fmt.Errorf("validation error: %v", err)
   825  	}
   826  
   827  	// NodePublishVolume's response contains no extra data. If err == nil, we were
   828  	// successful.
   829  	_, err := c.nodeClient.NodePublishVolume(ctx, req.ToCSIRepresentation(), opts...)
   830  	if err != nil {
   831  		code := status.Code(err)
   832  		switch code {
   833  		case codes.NotFound:
   834  			err = fmt.Errorf("volume %q could not be found: %v", req.ExternalID, err)
   835  		case codes.AlreadyExists:
   836  			err = fmt.Errorf(
   837  				"volume %q is already published at target path %q but with capabilities or a read_only setting incompatible with this request: %v",
   838  				req.ExternalID, req.TargetPath, err)
   839  		case codes.FailedPrecondition:
   840  			err = fmt.Errorf("volume %q is already published on another node and does not have MULTI_NODE volume capability: %v",
   841  				req.ExternalID, err)
   842  		case codes.Internal:
   843  			err = fmt.Errorf("node plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   844  		}
   845  	}
   846  	return err
   847  }
   848  
   849  func (c *client) NodeUnpublishVolume(ctx context.Context, volumeID, targetPath string, opts ...grpc.CallOption) error {
   850  	if err := c.ensureConnected(ctx); err != nil {
   851  		return err
   852  	}
   853  	// These errors should not be returned during production use but exist as aids
   854  	// during Nomad development
   855  	if volumeID == "" {
   856  		return fmt.Errorf("missing volumeID")
   857  	}
   858  	if targetPath == "" {
   859  		return fmt.Errorf("missing targetPath")
   860  	}
   861  
   862  	req := &csipbv1.NodeUnpublishVolumeRequest{
   863  		VolumeId:   volumeID,
   864  		TargetPath: targetPath,
   865  	}
   866  
   867  	// NodeUnpublishVolume's response contains no extra data. If err == nil, we were
   868  	// successful.
   869  	_, err := c.nodeClient.NodeUnpublishVolume(ctx, req, opts...)
   870  	if err != nil {
   871  		code := status.Code(err)
   872  		switch code {
   873  		case codes.NotFound:
   874  			err = fmt.Errorf("%w: volume %q could not be found: %v",
   875  				structs.ErrCSIClientRPCIgnorable, volumeID, err)
   876  		case codes.Internal:
   877  			err = fmt.Errorf("node plugin returned an internal error, check the plugin allocation logs for more information: %v", err)
   878  		}
   879  	}
   880  
   881  	return err
   882  }