github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/pluginmanager/csimanager/volume.go (about) 1 package csimanager 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "os" 8 "path/filepath" 9 "strings" 10 "time" 11 12 grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" 13 "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/helper/mount" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/plugins/csi" 18 ) 19 20 var _ VolumeMounter = &volumeManager{} 21 22 const ( 23 DefaultMountActionTimeout = 2 * time.Minute 24 StagingDirName = "staging" 25 AllocSpecificDirName = "per-alloc" 26 ) 27 28 // volumeManager handles the state of attached volumes for a given CSI Plugin. 29 // 30 // volumeManagers outlive the lifetime of a given allocation as volumes may be 31 // shared by multiple allocations on the same node. 32 // 33 // volumes are stored by an enriched volume usage struct as the CSI Spec requires 34 // slightly different usage based on the given usage model. 35 type volumeManager struct { 36 logger hclog.Logger 37 eventer TriggerNodeEvent 38 plugin csi.CSIPlugin 39 40 usageTracker *volumeUsageTracker 41 42 // mountRoot is the root of where plugin directories and mounts may be created 43 // e.g /opt/nomad.d/statedir/csi/my-csi-plugin/ 44 mountRoot string 45 46 // containerMountPoint is the location _inside_ the plugin container that the 47 // `mountRoot` is bound in to. 48 containerMountPoint string 49 50 // requiresStaging shows whether the plugin requires that the volume manager 51 // calls NodeStageVolume and NodeUnstageVolume RPCs during setup and teardown 52 requiresStaging bool 53 } 54 55 func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.CSIPlugin, rootDir, containerRootDir string, requiresStaging bool) *volumeManager { 56 return &volumeManager{ 57 logger: logger.Named("volume_manager"), 58 eventer: eventer, 59 plugin: plugin, 60 mountRoot: rootDir, 61 containerMountPoint: containerRootDir, 62 requiresStaging: requiresStaging, 63 usageTracker: newVolumeUsageTracker(), 64 } 65 } 66 67 func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string { 68 return filepath.Join(root, StagingDirName, volID, usage.ToFS()) 69 } 70 71 func (v *volumeManager) allocDirForVolume(root string, volID, allocID string) string { 72 return filepath.Join(root, AllocSpecificDirName, allocID, volID) 73 } 74 75 func (v *volumeManager) targetForVolume(root string, volID, allocID string, usage *UsageOptions) string { 76 return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS()) 77 } 78 79 // ensureStagingDir attempts to create a directory for use when staging a volume 80 // and then validates that the path is not already a mount point for e.g an 81 // existing volume stage. 82 // 83 // Returns whether the directory is a pre-existing mountpoint, the staging path, 84 // and any errors that occurred. 85 func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) { 86 stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage) 87 88 // Make the staging path, owned by the Nomad User 89 if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) { 90 return "", false, fmt.Errorf("failed to create staging directory for volume (%s): %v", vol.ID, err) 91 92 } 93 94 // Validate that it is not already a mount point 95 m := mount.New() 96 isNotMount, err := m.IsNotAMountPoint(stagingPath) 97 if err != nil { 98 return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err) 99 } 100 101 return stagingPath, !isNotMount, nil 102 } 103 104 // ensureAllocDir attempts to create a directory for use when publishing a volume 105 // and then validates that the path is not already a mount point (e.g when reattaching 106 // to existing allocs). 107 // 108 // Returns whether the directory is a pre-existing mountpoint, the publish path, 109 // and any errors that occurred. 110 func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) { 111 allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID) 112 113 // Make the alloc path, owned by the Nomad User 114 if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) { 115 return "", false, fmt.Errorf("failed to create allocation directory for volume (%s): %v", vol.ID, err) 116 } 117 118 // Validate that the target is not already a mount point 119 targetPath := v.targetForVolume(v.mountRoot, vol.ID, alloc.ID, usage) 120 121 m := mount.New() 122 isNotMount, err := m.IsNotAMountPoint(targetPath) 123 124 switch { 125 case errors.Is(err, os.ErrNotExist): 126 // ignore; path does not exist and as such is not a mount 127 case err != nil: 128 return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err) 129 } 130 131 return targetPath, !isNotMount, nil 132 } 133 134 func volumeCapability(vol *structs.CSIVolume, usage *UsageOptions) (*csi.VolumeCapability, error) { 135 var opts *structs.CSIMountOptions 136 if vol.MountOptions == nil { 137 opts = usage.MountOptions 138 } else { 139 opts = vol.MountOptions.Copy() 140 opts.Merge(usage.MountOptions) 141 } 142 143 capability, err := csi.VolumeCapabilityFromStructs(usage.AttachmentMode, usage.AccessMode, opts) 144 if err != nil { 145 return nil, err 146 } 147 148 return capability, nil 149 } 150 151 // stageVolume prepares a volume for use by allocations. When a plugin exposes 152 // the STAGE_UNSTAGE_VOLUME capability it MUST be called once-per-volume for a 153 // given usage mode before the volume can be NodePublish-ed. 154 func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions, publishContext map[string]string) error { 155 logger := hclog.FromContext(ctx) 156 logger.Trace("Preparing volume staging environment") 157 hostStagingPath, isMount, err := v.ensureStagingDir(vol, usage) 158 if err != nil { 159 return err 160 } 161 pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage) 162 163 logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath) 164 165 if isMount { 166 logger.Debug("re-using existing staging mount for volume", "staging_path", hostStagingPath) 167 return nil 168 } 169 170 capability, err := volumeCapability(vol, usage) 171 if err != nil { 172 return err 173 } 174 175 req := &csi.NodeStageVolumeRequest{ 176 ExternalID: vol.RemoteID(), 177 PublishContext: publishContext, 178 StagingTargetPath: pluginStagingPath, 179 VolumeCapability: capability, 180 Secrets: vol.Secrets, 181 VolumeContext: vol.Context, 182 } 183 184 // CSI NodeStageVolume errors for timeout, codes.Unavailable and 185 // codes.ResourceExhausted are retried; all other errors are fatal. 186 return v.plugin.NodeStageVolume(ctx, req, 187 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 188 grpc_retry.WithMax(3), 189 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 190 ) 191 } 192 193 func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (*MountInfo, error) { 194 logger := hclog.FromContext(ctx) 195 var pluginStagingPath string 196 if v.requiresStaging { 197 pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage) 198 } 199 200 hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage) 201 if err != nil { 202 return nil, err 203 } 204 pluginTargetPath := v.targetForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage) 205 206 if isMount { 207 logger.Debug("Re-using existing published volume for allocation") 208 return &MountInfo{Source: hostTargetPath}, nil 209 } 210 211 capabilities, err := volumeCapability(vol, usage) 212 if err != nil { 213 return nil, err 214 } 215 216 // CSI NodePublishVolume errors for timeout, codes.Unavailable and 217 // codes.ResourceExhausted are retried; all other errors are fatal. 218 err = v.plugin.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{ 219 ExternalID: vol.RemoteID(), 220 PublishContext: publishContext, 221 StagingTargetPath: pluginStagingPath, 222 TargetPath: pluginTargetPath, 223 VolumeCapability: capabilities, 224 Readonly: usage.ReadOnly, 225 Secrets: vol.Secrets, 226 VolumeContext: vol.Context, 227 }, 228 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 229 grpc_retry.WithMax(3), 230 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 231 ) 232 233 return &MountInfo{Source: hostTargetPath}, err 234 } 235 236 // MountVolume performs the steps required for using a given volume 237 // configuration for the provided allocation. 238 // It is passed the publishContext from remote attachment, and specific usage 239 // modes from the CSI Hook. 240 // It then uses this state to stage and publish the volume as required for use 241 // by the given allocation. 242 func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (mountInfo *MountInfo, err error) { 243 logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID) 244 ctx = hclog.WithContext(ctx, logger) 245 246 if v.requiresStaging { 247 err = v.stageVolume(ctx, vol, usage, publishContext) 248 } 249 250 if err == nil { 251 mountInfo, err = v.publishVolume(ctx, vol, alloc, usage, publishContext) 252 } 253 254 if err == nil { 255 v.usageTracker.Claim(alloc.ID, vol.ID, usage) 256 } 257 258 event := structs.NewNodeEvent(). 259 SetSubsystem(structs.NodeEventSubsystemStorage). 260 SetMessage("Mount volume"). 261 AddDetail("volume_id", vol.ID) 262 if err == nil { 263 event.AddDetail("success", "true") 264 } else { 265 event.AddDetail("success", "false") 266 event.AddDetail("error", err.Error()) 267 } 268 269 v.eventer(event) 270 271 return mountInfo, err 272 } 273 274 // unstageVolume is the inverse operation of `stageVolume` and must be called 275 // once for each staging path that a volume has been staged under. 276 // It is safe to call multiple times and a plugin is required to return OK if 277 // the volume has been unstaged or was never staged on the node. 278 func (v *volumeManager) unstageVolume(ctx context.Context, volID, remoteID string, usage *UsageOptions) error { 279 logger := hclog.FromContext(ctx) 280 logger.Trace("Unstaging volume") 281 stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage) 282 283 // CSI NodeUnstageVolume errors for timeout, codes.Unavailable and 284 // codes.ResourceExhausted are retried; all other errors are fatal. 285 return v.plugin.NodeUnstageVolume(ctx, 286 remoteID, 287 stagingPath, 288 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 289 grpc_retry.WithMax(3), 290 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 291 ) 292 } 293 294 func combineErrors(maybeErrs ...error) error { 295 var result *multierror.Error 296 for _, err := range maybeErrs { 297 if err == nil { 298 continue 299 } 300 301 result = multierror.Append(result, err) 302 } 303 304 return result.ErrorOrNil() 305 } 306 307 func (v *volumeManager) unpublishVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) error { 308 pluginTargetPath := v.targetForVolume(v.containerMountPoint, volID, allocID, usage) 309 310 // CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and 311 // codes.ResourceExhausted are retried; all other errors are fatal. 312 rpcErr := v.plugin.NodeUnpublishVolume(ctx, remoteID, pluginTargetPath, 313 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 314 grpc_retry.WithMax(3), 315 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 316 ) 317 318 hostTargetPath := v.targetForVolume(v.mountRoot, volID, allocID, usage) 319 if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) { 320 if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") { 321 // host target path was already destroyed, nothing to do here. 322 // this helps us in the case that a previous GC attempt cleaned 323 // up the volume on the node but the controller RPCs failed 324 rpcErr = fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr) 325 } 326 return rpcErr 327 } 328 329 // Host Target Path was not cleaned up, attempt to do so here. If it's still 330 // a mount then removing the dir will fail and we'll return any rpcErr and the 331 // file error. 332 rmErr := os.Remove(hostTargetPath) 333 if rmErr != nil { 334 return combineErrors(rpcErr, rmErr) 335 } 336 337 // We successfully removed the directory, return any rpcErrors that were 338 // encountered, but because we got here, they were probably flaky or was 339 // cleaned up externally. 340 return fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr) 341 } 342 343 func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) (err error) { 344 logger := v.logger.With("volume_id", volID, "alloc_id", allocID) 345 ctx = hclog.WithContext(ctx, logger) 346 347 err = v.unpublishVolume(ctx, volID, remoteID, allocID, usage) 348 349 if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) { 350 canRelease := v.usageTracker.Free(allocID, volID, usage) 351 if v.requiresStaging && canRelease { 352 err = v.unstageVolume(ctx, volID, remoteID, usage) 353 } 354 } 355 356 if errors.Is(err, structs.ErrCSIClientRPCIgnorable) { 357 logger.Trace("unmounting volume failed with ignorable error", "error", err) 358 err = nil 359 } 360 361 event := structs.NewNodeEvent(). 362 SetSubsystem(structs.NodeEventSubsystemStorage). 363 SetMessage("Unmount volume"). 364 AddDetail("volume_id", volID) 365 if err == nil { 366 event.AddDetail("success", "true") 367 } else { 368 event.AddDetail("success", "false") 369 event.AddDetail("error", err.Error()) 370 } 371 372 v.eventer(event) 373 374 return err 375 }