github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/pluginmanager/csimanager/volume.go (about) 1 package csimanager 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "os" 8 "path/filepath" 9 "strings" 10 "time" 11 12 grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" 13 "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/helper/mount" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/plugins/csi" 18 ) 19 20 var _ VolumeMounter = &volumeManager{} 21 22 const ( 23 DefaultMountActionTimeout = 2 * time.Minute 24 StagingDirName = "staging" 25 AllocSpecificDirName = "per-alloc" 26 ) 27 28 // volumeManager handles the state of attached volumes for a given CSI Plugin. 29 // 30 // volumeManagers outlive the lifetime of a given allocation as volumes may be 31 // shared by multiple allocations on the same node. 32 // 33 // volumes are stored by an enriched volume usage struct as the CSI Spec requires 34 // slightly different usage based on the given usage model. 35 type volumeManager struct { 36 logger hclog.Logger 37 eventer TriggerNodeEvent 38 plugin csi.CSIPlugin 39 40 usageTracker *volumeUsageTracker 41 42 // mountRoot is the root of where plugin directories and mounts may be created 43 // e.g /opt/nomad.d/statedir/csi/my-csi-plugin/ 44 mountRoot string 45 46 // containerMountPoint is the location _inside_ the plugin container that the 47 // `mountRoot` is bound in to. 48 containerMountPoint string 49 50 // requiresStaging shows whether the plugin requires that the volume manager 51 // calls NodeStageVolume and NodeUnstageVolume RPCs during setup and teardown 52 requiresStaging bool 53 } 54 55 func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.CSIPlugin, rootDir, containerRootDir string, requiresStaging bool) *volumeManager { 56 return &volumeManager{ 57 logger: logger.Named("volume_manager"), 58 eventer: eventer, 59 plugin: plugin, 60 mountRoot: rootDir, 61 containerMountPoint: containerRootDir, 62 requiresStaging: requiresStaging, 63 usageTracker: newVolumeUsageTracker(), 64 } 65 } 66 67 func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string { 68 return filepath.Join(root, StagingDirName, volID, usage.ToFS()) 69 } 70 71 func (v *volumeManager) allocDirForVolume(root string, volID, allocID string) string { 72 return filepath.Join(root, AllocSpecificDirName, allocID, volID) 73 } 74 75 func (v *volumeManager) targetForVolume(root string, volID, allocID string, usage *UsageOptions) string { 76 return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS()) 77 } 78 79 // ensureStagingDir attempts to create a directory for use when staging a volume 80 // and then validates that the path is not already a mount point for e.g an 81 // existing volume stage. 82 // 83 // Returns whether the directory is a pre-existing mountpoint, the staging path, 84 // and any errors that occurred. 85 func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) { 86 stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage) 87 88 // Make the staging path, owned by the Nomad User 89 if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) { 90 return "", false, fmt.Errorf("failed to create staging directory for volume (%s): %v", vol.ID, err) 91 92 } 93 94 // Validate that it is not already a mount point 95 m := mount.New() 96 isNotMount, err := m.IsNotAMountPoint(stagingPath) 97 if err != nil { 98 return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err) 99 } 100 101 return stagingPath, !isNotMount, nil 102 } 103 104 // ensureAllocDir attempts to create a directory for use when publishing a volume 105 // and then validates that the path is not already a mount point (e.g when reattaching 106 // to existing allocs). 107 // 108 // Returns whether the directory is a pre-existing mountpoint, the publish path, 109 // and any errors that occurred. 110 func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) { 111 allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID) 112 113 // Make the alloc path, owned by the Nomad User 114 if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) { 115 return "", false, fmt.Errorf("failed to create allocation directory for volume (%s): %v", vol.ID, err) 116 } 117 118 // Validate that the target is not already a mount point 119 targetPath := v.targetForVolume(v.mountRoot, vol.ID, alloc.ID, usage) 120 m := mount.New() 121 isNotMount, err := m.IsNotAMountPoint(targetPath) 122 if err != nil { 123 return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err) 124 } 125 126 return targetPath, !isNotMount, nil 127 } 128 129 func volumeCapability(vol *structs.CSIVolume, usage *UsageOptions) (*csi.VolumeCapability, error) { 130 capability, err := csi.VolumeCapabilityFromStructs(vol.AttachmentMode, vol.AccessMode) 131 if err != nil { 132 return nil, err 133 } 134 135 var opts *structs.CSIMountOptions 136 if vol.MountOptions == nil { 137 opts = usage.MountOptions 138 } else { 139 opts = vol.MountOptions.Copy() 140 opts.Merge(usage.MountOptions) 141 } 142 143 capability.MountVolume = opts 144 145 return capability, nil 146 } 147 148 // stageVolume prepares a volume for use by allocations. When a plugin exposes 149 // the STAGE_UNSTAGE_VOLUME capability it MUST be called once-per-volume for a 150 // given usage mode before the volume can be NodePublish-ed. 151 func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions, publishContext map[string]string) error { 152 logger := hclog.FromContext(ctx) 153 logger.Trace("Preparing volume staging environment") 154 hostStagingPath, isMount, err := v.ensureStagingDir(vol, usage) 155 if err != nil { 156 return err 157 } 158 pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage) 159 160 logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath) 161 162 if isMount { 163 logger.Debug("re-using existing staging mount for volume", "staging_path", hostStagingPath) 164 return nil 165 } 166 167 capability, err := volumeCapability(vol, usage) 168 if err != nil { 169 return err 170 } 171 172 req := &csi.NodeStageVolumeRequest{ 173 ExternalID: vol.RemoteID(), 174 PublishContext: publishContext, 175 StagingTargetPath: pluginStagingPath, 176 VolumeCapability: capability, 177 Secrets: vol.Secrets, 178 VolumeContext: vol.Context, 179 } 180 181 // CSI NodeStageVolume errors for timeout, codes.Unavailable and 182 // codes.ResourceExhausted are retried; all other errors are fatal. 183 return v.plugin.NodeStageVolume(ctx, req, 184 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 185 grpc_retry.WithMax(3), 186 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 187 ) 188 } 189 190 func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (*MountInfo, error) { 191 logger := hclog.FromContext(ctx) 192 var pluginStagingPath string 193 if v.requiresStaging { 194 pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage) 195 } 196 197 hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage) 198 if err != nil { 199 return nil, err 200 } 201 pluginTargetPath := v.targetForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage) 202 203 if isMount { 204 logger.Debug("Re-using existing published volume for allocation") 205 return &MountInfo{Source: hostTargetPath}, nil 206 } 207 208 capabilities, err := volumeCapability(vol, usage) 209 if err != nil { 210 return nil, err 211 } 212 213 // CSI NodePublishVolume errors for timeout, codes.Unavailable and 214 // codes.ResourceExhausted are retried; all other errors are fatal. 215 err = v.plugin.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{ 216 ExternalID: vol.RemoteID(), 217 PublishContext: publishContext, 218 StagingTargetPath: pluginStagingPath, 219 TargetPath: pluginTargetPath, 220 VolumeCapability: capabilities, 221 Readonly: usage.ReadOnly, 222 Secrets: vol.Secrets, 223 VolumeContext: vol.Context, 224 }, 225 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 226 grpc_retry.WithMax(3), 227 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 228 ) 229 230 return &MountInfo{Source: hostTargetPath}, err 231 } 232 233 // MountVolume performs the steps required for using a given volume 234 // configuration for the provided allocation. 235 // It is passed the publishContext from remote attachment, and specific usage 236 // modes from the CSI Hook. 237 // It then uses this state to stage and publish the volume as required for use 238 // by the given allocation. 239 func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (mountInfo *MountInfo, err error) { 240 logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID) 241 ctx = hclog.WithContext(ctx, logger) 242 243 if v.requiresStaging { 244 err = v.stageVolume(ctx, vol, usage, publishContext) 245 } 246 247 if err == nil { 248 mountInfo, err = v.publishVolume(ctx, vol, alloc, usage, publishContext) 249 } 250 251 if err == nil { 252 v.usageTracker.Claim(alloc.ID, vol.ID, usage) 253 } 254 255 event := structs.NewNodeEvent(). 256 SetSubsystem(structs.NodeEventSubsystemStorage). 257 SetMessage("Mount volume"). 258 AddDetail("volume_id", vol.ID) 259 if err == nil { 260 event.AddDetail("success", "true") 261 } else { 262 event.AddDetail("success", "false") 263 event.AddDetail("error", err.Error()) 264 } 265 266 v.eventer(event) 267 268 return mountInfo, err 269 } 270 271 // unstageVolume is the inverse operation of `stageVolume` and must be called 272 // once for each staging path that a volume has been staged under. 273 // It is safe to call multiple times and a plugin is required to return OK if 274 // the volume has been unstaged or was never staged on the node. 275 func (v *volumeManager) unstageVolume(ctx context.Context, volID, remoteID string, usage *UsageOptions) error { 276 logger := hclog.FromContext(ctx) 277 logger.Trace("Unstaging volume") 278 stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage) 279 280 // CSI NodeUnstageVolume errors for timeout, codes.Unavailable and 281 // codes.ResourceExhausted are retried; all other errors are fatal. 282 return v.plugin.NodeUnstageVolume(ctx, 283 remoteID, 284 stagingPath, 285 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 286 grpc_retry.WithMax(3), 287 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 288 ) 289 } 290 291 func combineErrors(maybeErrs ...error) error { 292 var result *multierror.Error 293 for _, err := range maybeErrs { 294 if err == nil { 295 continue 296 } 297 298 result = multierror.Append(result, err) 299 } 300 301 return result.ErrorOrNil() 302 } 303 304 func (v *volumeManager) unpublishVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) error { 305 pluginTargetPath := v.targetForVolume(v.containerMountPoint, volID, allocID, usage) 306 307 // CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and 308 // codes.ResourceExhausted are retried; all other errors are fatal. 309 rpcErr := v.plugin.NodeUnpublishVolume(ctx, remoteID, pluginTargetPath, 310 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 311 grpc_retry.WithMax(3), 312 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 313 ) 314 315 hostTargetPath := v.targetForVolume(v.mountRoot, volID, allocID, usage) 316 if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) { 317 if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") { 318 // host target path was already destroyed, nothing to do here. 319 // this helps us in the case that a previous GC attempt cleaned 320 // up the volume on the node but the controller RPCs failed 321 rpcErr = fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr) 322 } 323 return rpcErr 324 } 325 326 // Host Target Path was not cleaned up, attempt to do so here. If it's still 327 // a mount then removing the dir will fail and we'll return any rpcErr and the 328 // file error. 329 rmErr := os.Remove(hostTargetPath) 330 if rmErr != nil { 331 return combineErrors(rpcErr, rmErr) 332 } 333 334 // We successfully removed the directory, return any rpcErrors that were 335 // encountered, but because we got here, they were probably flaky or was 336 // cleaned up externally. 337 return fmt.Errorf("%w: %v", structs.ErrCSIClientRPCIgnorable, rpcErr) 338 } 339 340 func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) (err error) { 341 logger := v.logger.With("volume_id", volID, "alloc_id", allocID) 342 ctx = hclog.WithContext(ctx, logger) 343 344 err = v.unpublishVolume(ctx, volID, remoteID, allocID, usage) 345 346 if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) { 347 canRelease := v.usageTracker.Free(allocID, volID, usage) 348 if v.requiresStaging && canRelease { 349 err = v.unstageVolume(ctx, volID, remoteID, usage) 350 } 351 } 352 353 event := structs.NewNodeEvent(). 354 SetSubsystem(structs.NodeEventSubsystemStorage). 355 SetMessage("Unmount volume"). 356 AddDetail("volume_id", volID) 357 if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) { 358 event.AddDetail("success", "true") 359 } else { 360 event.AddDetail("success", "false") 361 event.AddDetail("error", err.Error()) 362 } 363 364 v.eventer(event) 365 366 return err 367 }