github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/csi_endpoint.go (about) 1 package client 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 metrics "github.com/armon/go-metrics" 10 grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" 11 "github.com/hashicorp/nomad/client/dynamicplugins" 12 "github.com/hashicorp/nomad/client/pluginmanager/csimanager" 13 "github.com/hashicorp/nomad/client/structs" 14 nstructs "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/plugins/csi" 16 ) 17 18 // CSI endpoint is used for interacting with CSI plugins on a client. 19 // TODO: Submit metrics with labels to allow debugging per plugin perf problems. 20 type CSI struct { 21 c *Client 22 } 23 24 const ( 25 // CSIPluginRequestTimeout is the timeout that should be used when making reqs 26 // against CSI Plugins. It is copied from Kubernetes as an initial seed value. 27 // https://github.com/kubernetes/kubernetes/blob/e680ad7156f263a6d8129cc0117fda58602e50ad/pkg/volume/csi/csi_plugin.go#L52 28 CSIPluginRequestTimeout = 2 * time.Minute 29 ) 30 31 var ( 32 ErrPluginTypeError = errors.New("CSI Plugin loaded incorrectly") 33 ) 34 35 // ControllerValidateVolume is used during volume registration to validate 36 // that a volume exists and that the capabilities it was registered with are 37 // supported by the CSI Plugin and external volume configuration. 38 func (c *CSI) ControllerValidateVolume(req *structs.ClientCSIControllerValidateVolumeRequest, resp *structs.ClientCSIControllerValidateVolumeResponse) error { 39 defer metrics.MeasureSince([]string{"client", "csi_controller", "validate_volume"}, time.Now()) 40 41 if req.VolumeID == "" { 42 return errors.New("VolumeID is required") 43 } 44 45 if req.PluginID == "" { 46 return errors.New("PluginID is required") 47 } 48 49 plugin, err := c.findControllerPlugin(req.PluginID) 50 if err != nil { 51 // the server's view of the plugin health is stale, so let it know it 52 // should retry with another controller instance 53 return fmt.Errorf("%w: %v", nstructs.ErrCSIClientRPCRetryable, err) 54 } 55 defer plugin.Close() 56 57 csiReq, err := req.ToCSIRequest() 58 if err != nil { 59 return err 60 } 61 62 ctx, cancelFn := c.requestContext() 63 defer cancelFn() 64 65 // CSI ValidateVolumeCapabilities errors for timeout, codes.Unavailable and 66 // codes.ResourceExhausted are retried; all other errors are fatal. 67 return plugin.ControllerValidateCapabilities(ctx, csiReq, 68 grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), 69 grpc_retry.WithMax(3), 70 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) 71 } 72 73 // ControllerAttachVolume is used to attach a volume from a CSI Cluster to 74 // the storage node provided in the request. 75 // 76 // The controller attachment flow currently works as follows: 77 // 1. Validate the volume request 78 // 2. Call ControllerPublishVolume on the CSI Plugin to trigger a remote attachment 79 // 80 // In the future this may be expanded to request dynamic secrets for attachment. 81 func (c *CSI) ControllerAttachVolume(req *structs.ClientCSIControllerAttachVolumeRequest, resp *structs.ClientCSIControllerAttachVolumeResponse) error { 82 defer metrics.MeasureSince([]string{"client", "csi_controller", "publish_volume"}, time.Now()) 83 plugin, err := c.findControllerPlugin(req.PluginID) 84 if err != nil { 85 // the server's view of the plugin health is stale, so let it know it 86 // should retry with another controller instance 87 return fmt.Errorf("%w: %v", nstructs.ErrCSIClientRPCRetryable, err) 88 } 89 defer plugin.Close() 90 91 // The following block of validation checks should not be reached on a 92 // real Nomad cluster as all of this data should be validated when registering 93 // volumes with the cluster. They serve as a defensive check before forwarding 94 // requests to plugins, and to aid with development. 95 96 if req.VolumeID == "" { 97 return errors.New("VolumeID is required") 98 } 99 100 if req.ClientCSINodeID == "" { 101 return errors.New("ClientCSINodeID is required") 102 } 103 104 csiReq, err := req.ToCSIRequest() 105 if err != nil { 106 return err 107 } 108 109 // Submit the request for a volume to the CSI Plugin. 110 ctx, cancelFn := c.requestContext() 111 defer cancelFn() 112 // CSI ControllerPublishVolume errors for timeout, codes.Unavailable and 113 // codes.ResourceExhausted are retried; all other errors are fatal. 114 cresp, err := plugin.ControllerPublishVolume(ctx, csiReq, 115 grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), 116 grpc_retry.WithMax(3), 117 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) 118 if err != nil { 119 return err 120 } 121 122 resp.PublishContext = cresp.PublishContext 123 return nil 124 } 125 126 // ControllerDetachVolume is used to detach a volume from a CSI Cluster from 127 // the storage node provided in the request. 128 func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolumeRequest, resp *structs.ClientCSIControllerDetachVolumeResponse) error { 129 defer metrics.MeasureSince([]string{"client", "csi_controller", "unpublish_volume"}, time.Now()) 130 plugin, err := c.findControllerPlugin(req.PluginID) 131 if err != nil { 132 // the server's view of the plugin health is stale, so let it know it 133 // should retry with another controller instance 134 return fmt.Errorf("%w: %v", nstructs.ErrCSIClientRPCRetryable, err) 135 } 136 defer plugin.Close() 137 138 // The following block of validation checks should not be reached on a 139 // real Nomad cluster as all of this data should be validated when registering 140 // volumes with the cluster. They serve as a defensive check before forwarding 141 // requests to plugins, and to aid with development. 142 143 if req.VolumeID == "" { 144 return errors.New("VolumeID is required") 145 } 146 147 if req.ClientCSINodeID == "" { 148 return errors.New("ClientCSINodeID is required") 149 } 150 151 csiReq := req.ToCSIRequest() 152 153 // Submit the request for a volume to the CSI Plugin. 154 ctx, cancelFn := c.requestContext() 155 defer cancelFn() 156 // CSI ControllerUnpublishVolume errors for timeout, codes.Unavailable and 157 // codes.ResourceExhausted are retried; all other errors are fatal. 158 _, err = plugin.ControllerUnpublishVolume(ctx, csiReq, 159 grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout), 160 grpc_retry.WithMax(3), 161 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond))) 162 if err != nil { 163 if errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) { 164 // if the controller detach previously happened but the server failed to 165 // checkpoint, we'll get an error from the plugin but can safely ignore it. 166 c.c.logger.Debug("could not unpublish volume: %v", err) 167 return nil 168 } 169 return err 170 } 171 return nil 172 } 173 174 // NodeDetachVolume is used to detach a volume from a CSI Cluster from 175 // the storage node provided in the request. 176 func (c *CSI) NodeDetachVolume(req *structs.ClientCSINodeDetachVolumeRequest, resp *structs.ClientCSINodeDetachVolumeResponse) error { 177 defer metrics.MeasureSince([]string{"client", "csi_node", "detach_volume"}, time.Now()) 178 179 // The following block of validation checks should not be reached on a 180 // real Nomad cluster. They serve as a defensive check before forwarding 181 // requests to plugins, and to aid with development. 182 if req.PluginID == "" { 183 return errors.New("PluginID is required") 184 } 185 if req.VolumeID == "" { 186 return errors.New("VolumeID is required") 187 } 188 if req.AllocID == "" { 189 return errors.New("AllocID is required") 190 } 191 192 ctx, cancelFn := c.requestContext() 193 defer cancelFn() 194 195 mounter, err := c.c.csimanager.MounterForPlugin(ctx, req.PluginID) 196 if err != nil { 197 return err 198 } 199 200 usageOpts := &csimanager.UsageOptions{ 201 ReadOnly: req.ReadOnly, 202 AttachmentMode: string(req.AttachmentMode), 203 AccessMode: string(req.AccessMode), 204 } 205 206 err = mounter.UnmountVolume(ctx, req.VolumeID, req.ExternalID, req.AllocID, usageOpts) 207 if err != nil && !errors.Is(err, nstructs.ErrCSIClientRPCIgnorable) { 208 // if the unmounting previously happened but the server failed to 209 // checkpoint, we'll get an error from Unmount but can safely 210 // ignore it. 211 return err 212 } 213 return nil 214 } 215 216 func (c *CSI) findControllerPlugin(name string) (csi.CSIPlugin, error) { 217 return c.findPlugin(dynamicplugins.PluginTypeCSIController, name) 218 } 219 220 func (c *CSI) findPlugin(ptype, name string) (csi.CSIPlugin, error) { 221 pIface, err := c.c.dynamicRegistry.DispensePlugin(ptype, name) 222 if err != nil { 223 return nil, err 224 } 225 226 plugin, ok := pIface.(csi.CSIPlugin) 227 if !ok { 228 return nil, ErrPluginTypeError 229 } 230 231 return plugin, nil 232 } 233 234 func (c *CSI) requestContext() (context.Context, context.CancelFunc) { 235 return context.WithTimeout(context.Background(), CSIPluginRequestTimeout) 236 }