github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/pluginmanager/csimanager/volume.go (about) 1 package csimanager 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "strings" 9 "time" 10 11 grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" 12 "github.com/hashicorp/go-hclog" 13 "github.com/hashicorp/go-multierror" 14 "github.com/hashicorp/nomad/helper/mount" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/hashicorp/nomad/plugins/csi" 17 ) 18 19 var _ VolumeMounter = &volumeManager{} 20 21 const ( 22 DefaultMountActionTimeout = 2 * time.Minute 23 StagingDirName = "staging" 24 AllocSpecificDirName = "per-alloc" 25 ) 26 27 // volumeManager handles the state of attached volumes for a given CSI Plugin. 28 // 29 // volumeManagers outlive the lifetime of a given allocation as volumes may be 30 // shared by multiple allocations on the same node. 31 // 32 // volumes are stored by an enriched volume usage struct as the CSI Spec requires 33 // slightly different usage based on the given usage model. 34 type volumeManager struct { 35 logger hclog.Logger 36 eventer TriggerNodeEvent 37 plugin csi.CSIPlugin 38 39 usageTracker *volumeUsageTracker 40 41 // mountRoot is the root of where plugin directories and mounts may be created 42 // e.g /opt/nomad.d/statedir/csi/my-csi-plugin/ 43 mountRoot string 44 45 // containerMountPoint is the location _inside_ the plugin container that the 46 // `mountRoot` is bound in to. 47 containerMountPoint string 48 49 // requiresStaging shows whether the plugin requires that the volume manager 50 // calls NodeStageVolume and NodeUnstageVolume RPCs during setup and teardown 51 requiresStaging bool 52 } 53 54 func newVolumeManager(logger hclog.Logger, eventer TriggerNodeEvent, plugin csi.CSIPlugin, rootDir, containerRootDir string, requiresStaging bool) *volumeManager { 55 return &volumeManager{ 56 logger: logger.Named("volume_manager"), 57 eventer: eventer, 58 plugin: plugin, 59 mountRoot: rootDir, 60 containerMountPoint: containerRootDir, 61 requiresStaging: requiresStaging, 62 usageTracker: newVolumeUsageTracker(), 63 } 64 } 65 66 func (v *volumeManager) stagingDirForVolume(root string, volID string, usage *UsageOptions) string { 67 return filepath.Join(root, StagingDirName, volID, usage.ToFS()) 68 } 69 70 func (v *volumeManager) allocDirForVolume(root string, volID, allocID string, usage *UsageOptions) string { 71 return filepath.Join(root, AllocSpecificDirName, allocID, volID, usage.ToFS()) 72 } 73 74 // ensureStagingDir attempts to create a directory for use when staging a volume 75 // and then validates that the path is not already a mount point for e.g an 76 // existing volume stage. 77 // 78 // Returns whether the directory is a pre-existing mountpoint, the staging path, 79 // and any errors that occurred. 80 func (v *volumeManager) ensureStagingDir(vol *structs.CSIVolume, usage *UsageOptions) (string, bool, error) { 81 stagingPath := v.stagingDirForVolume(v.mountRoot, vol.ID, usage) 82 83 // Make the staging path, owned by the Nomad User 84 if err := os.MkdirAll(stagingPath, 0700); err != nil && !os.IsExist(err) { 85 return "", false, fmt.Errorf("failed to create staging directory for volume (%s): %v", vol.ID, err) 86 87 } 88 89 // Validate that it is not already a mount point 90 m := mount.New() 91 isNotMount, err := m.IsNotAMountPoint(stagingPath) 92 if err != nil { 93 return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err) 94 } 95 96 return stagingPath, !isNotMount, nil 97 } 98 99 // ensureAllocDir attempts to create a directory for use when publishing a volume 100 // and then validates that the path is not already a mount point (e.g when reattaching 101 // to existing allocs). 102 // 103 // Returns whether the directory is a pre-existing mountpoint, the publish path, 104 // and any errors that occurred. 105 func (v *volumeManager) ensureAllocDir(vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions) (string, bool, error) { 106 allocPath := v.allocDirForVolume(v.mountRoot, vol.ID, alloc.ID, usage) 107 108 // Make the alloc path, owned by the Nomad User 109 if err := os.MkdirAll(allocPath, 0700); err != nil && !os.IsExist(err) { 110 return "", false, fmt.Errorf("failed to create allocation directory for volume (%s): %v", vol.ID, err) 111 } 112 113 // Validate that it is not already a mount point 114 m := mount.New() 115 isNotMount, err := m.IsNotAMountPoint(allocPath) 116 if err != nil { 117 return "", false, fmt.Errorf("mount point detection failed for volume (%s): %v", vol.ID, err) 118 } 119 120 return allocPath, !isNotMount, nil 121 } 122 123 func volumeCapability(vol *structs.CSIVolume, usage *UsageOptions) (*csi.VolumeCapability, error) { 124 capability, err := csi.VolumeCapabilityFromStructs(vol.AttachmentMode, vol.AccessMode) 125 if err != nil { 126 return nil, err 127 } 128 129 var opts *structs.CSIMountOptions 130 if vol.MountOptions == nil { 131 opts = usage.MountOptions 132 } else { 133 opts = vol.MountOptions.Copy() 134 opts.Merge(usage.MountOptions) 135 } 136 137 capability.MountVolume = opts 138 139 return capability, nil 140 } 141 142 // stageVolume prepares a volume for use by allocations. When a plugin exposes 143 // the STAGE_UNSTAGE_VOLUME capability it MUST be called once-per-volume for a 144 // given usage mode before the volume can be NodePublish-ed. 145 func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume, usage *UsageOptions, publishContext map[string]string) error { 146 logger := hclog.FromContext(ctx) 147 logger.Trace("Preparing volume staging environment") 148 hostStagingPath, isMount, err := v.ensureStagingDir(vol, usage) 149 if err != nil { 150 return err 151 } 152 pluginStagingPath := v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage) 153 154 logger.Trace("Volume staging environment", "pre-existing_mount", isMount, "host_staging_path", hostStagingPath, "plugin_staging_path", pluginStagingPath) 155 156 if isMount { 157 logger.Debug("re-using existing staging mount for volume", "staging_path", hostStagingPath) 158 return nil 159 } 160 161 capability, err := volumeCapability(vol, usage) 162 if err != nil { 163 return err 164 } 165 166 // CSI NodeStageVolume errors for timeout, codes.Unavailable and 167 // codes.ResourceExhausted are retried; all other errors are fatal. 168 return v.plugin.NodeStageVolume(ctx, 169 vol.RemoteID(), 170 publishContext, 171 pluginStagingPath, 172 capability, 173 vol.Secrets, 174 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 175 grpc_retry.WithMax(3), 176 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 177 ) 178 } 179 180 func (v *volumeManager) publishVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (*MountInfo, error) { 181 logger := hclog.FromContext(ctx) 182 var pluginStagingPath string 183 if v.requiresStaging { 184 pluginStagingPath = v.stagingDirForVolume(v.containerMountPoint, vol.ID, usage) 185 } 186 187 hostTargetPath, isMount, err := v.ensureAllocDir(vol, alloc, usage) 188 if err != nil { 189 return nil, err 190 } 191 pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, vol.ID, alloc.ID, usage) 192 193 if isMount { 194 logger.Debug("Re-using existing published volume for allocation") 195 return &MountInfo{Source: hostTargetPath}, nil 196 } 197 198 capabilities, err := volumeCapability(vol, usage) 199 if err != nil { 200 return nil, err 201 } 202 203 // CSI NodePublishVolume errors for timeout, codes.Unavailable and 204 // codes.ResourceExhausted are retried; all other errors are fatal. 205 err = v.plugin.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{ 206 ExternalID: vol.RemoteID(), 207 PublishContext: publishContext, 208 StagingTargetPath: pluginStagingPath, 209 TargetPath: pluginTargetPath, 210 VolumeCapability: capabilities, 211 Readonly: usage.ReadOnly, 212 Secrets: vol.Secrets, 213 }, 214 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 215 grpc_retry.WithMax(3), 216 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 217 ) 218 219 return &MountInfo{Source: hostTargetPath}, err 220 } 221 222 // MountVolume performs the steps required for using a given volume 223 // configuration for the provided allocation. 224 // It is passed the publishContext from remote attachment, and specific usage 225 // modes from the CSI Hook. 226 // It then uses this state to stage and publish the volume as required for use 227 // by the given allocation. 228 func (v *volumeManager) MountVolume(ctx context.Context, vol *structs.CSIVolume, alloc *structs.Allocation, usage *UsageOptions, publishContext map[string]string) (mountInfo *MountInfo, err error) { 229 logger := v.logger.With("volume_id", vol.ID, "alloc_id", alloc.ID) 230 ctx = hclog.WithContext(ctx, logger) 231 232 if v.requiresStaging { 233 err = v.stageVolume(ctx, vol, usage, publishContext) 234 } 235 236 if err == nil { 237 mountInfo, err = v.publishVolume(ctx, vol, alloc, usage, publishContext) 238 } 239 240 if err == nil { 241 v.usageTracker.Claim(alloc.ID, vol.ID, usage) 242 } 243 244 event := structs.NewNodeEvent(). 245 SetSubsystem(structs.NodeEventSubsystemStorage). 246 SetMessage("Mount volume"). 247 AddDetail("volume_id", vol.ID) 248 if err == nil { 249 event.AddDetail("success", "true") 250 } else { 251 event.AddDetail("success", "false") 252 event.AddDetail("error", err.Error()) 253 } 254 255 v.eventer(event) 256 257 return mountInfo, err 258 } 259 260 // unstageVolume is the inverse operation of `stageVolume` and must be called 261 // once for each staging path that a volume has been staged under. 262 // It is safe to call multiple times and a plugin is required to return OK if 263 // the volume has been unstaged or was never staged on the node. 264 func (v *volumeManager) unstageVolume(ctx context.Context, volID, remoteID string, usage *UsageOptions) error { 265 logger := hclog.FromContext(ctx) 266 logger.Trace("Unstaging volume") 267 stagingPath := v.stagingDirForVolume(v.containerMountPoint, volID, usage) 268 269 // CSI NodeUnstageVolume errors for timeout, codes.Unavailable and 270 // codes.ResourceExhausted are retried; all other errors are fatal. 271 return v.plugin.NodeUnstageVolume(ctx, 272 remoteID, 273 stagingPath, 274 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 275 grpc_retry.WithMax(3), 276 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 277 ) 278 } 279 280 func combineErrors(maybeErrs ...error) error { 281 var result *multierror.Error 282 for _, err := range maybeErrs { 283 if err == nil { 284 continue 285 } 286 287 result = multierror.Append(result, err) 288 } 289 290 return result.ErrorOrNil() 291 } 292 293 func (v *volumeManager) unpublishVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) error { 294 pluginTargetPath := v.allocDirForVolume(v.containerMountPoint, volID, allocID, usage) 295 296 // CSI NodeUnpublishVolume errors for timeout, codes.Unavailable and 297 // codes.ResourceExhausted are retried; all other errors are fatal. 298 rpcErr := v.plugin.NodeUnpublishVolume(ctx, remoteID, pluginTargetPath, 299 grpc_retry.WithPerRetryTimeout(DefaultMountActionTimeout), 300 grpc_retry.WithMax(3), 301 grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)), 302 ) 303 304 hostTargetPath := v.allocDirForVolume(v.mountRoot, volID, allocID, usage) 305 if _, err := os.Stat(hostTargetPath); os.IsNotExist(err) { 306 if rpcErr != nil && strings.Contains(rpcErr.Error(), "no mount point") { 307 // host target path was already destroyed, nothing to do here. 308 // this helps us in the case that a previous GC attempt cleaned 309 // up the volume on the node but the controller RPCs failed 310 return nil 311 } 312 return rpcErr 313 } 314 315 // Host Target Path was not cleaned up, attempt to do so here. If it's still 316 // a mount then removing the dir will fail and we'll return any rpcErr and the 317 // file error. 318 rmErr := os.Remove(hostTargetPath) 319 if rmErr != nil { 320 return combineErrors(rpcErr, rmErr) 321 } 322 323 // We successfully removed the directory, return any rpcErrors that were 324 // encountered, but because we got here, they were probably flaky or was 325 // cleaned up externally. We might want to just return `nil` here in the 326 // future. 327 return rpcErr 328 } 329 330 func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allocID string, usage *UsageOptions) (err error) { 331 logger := v.logger.With("volume_id", volID, "alloc_id", allocID) 332 ctx = hclog.WithContext(ctx, logger) 333 334 err = v.unpublishVolume(ctx, volID, remoteID, allocID, usage) 335 336 if err == nil { 337 canRelease := v.usageTracker.Free(allocID, volID, usage) 338 if v.requiresStaging && canRelease { 339 err = v.unstageVolume(ctx, volID, remoteID, usage) 340 } 341 } 342 343 event := structs.NewNodeEvent(). 344 SetSubsystem(structs.NodeEventSubsystemStorage). 345 SetMessage("Unmount volume"). 346 AddDetail("volume_id", volID) 347 if err == nil { 348 event.AddDetail("success", "true") 349 } else { 350 event.AddDetail("success", "false") 351 event.AddDetail("error", err.Error()) 352 } 353 354 v.eventer(event) 355 356 return err 357 }