github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/csi_hook.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "sync" 8 "time" 9 10 hclog "github.com/hashicorp/go-hclog" 11 multierror "github.com/hashicorp/go-multierror" 12 "github.com/hashicorp/nomad/client/pluginmanager/csimanager" 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/plugins/drivers" 16 ) 17 18 // csiHook will wait for remote csi volumes to be attached to the host before 19 // continuing. 20 // 21 // It is a noop for allocs that do not depend on CSI Volumes. 22 type csiHook struct { 23 alloc *structs.Allocation 24 logger hclog.Logger 25 csimanager csimanager.Manager 26 27 // interfaces implemented by the allocRunner 28 rpcClient RPCer 29 taskCapabilityGetter taskCapabilityGetter 30 updater hookResourceSetter 31 32 nodeSecret string 33 volumeRequests map[string]*volumeAndRequest 34 minBackoffInterval time.Duration 35 maxBackoffInterval time.Duration 36 maxBackoffDuration time.Duration 37 38 shutdownCtx context.Context 39 shutdownCancelFn context.CancelFunc 40 } 41 42 // implemented by allocrunner 43 type taskCapabilityGetter interface { 44 GetTaskDriverCapabilities(string) (*drivers.Capabilities, error) 45 } 46 47 func newCSIHook(alloc *structs.Allocation, logger hclog.Logger, csi csimanager.Manager, rpcClient RPCer, taskCapabilityGetter taskCapabilityGetter, updater hookResourceSetter, nodeSecret string) *csiHook { 48 49 shutdownCtx, shutdownCancelFn := context.WithCancel(context.Background()) 50 51 return &csiHook{ 52 alloc: alloc, 53 logger: logger.Named("csi_hook"), 54 csimanager: csi, 55 rpcClient: rpcClient, 56 taskCapabilityGetter: taskCapabilityGetter, 57 updater: updater, 58 nodeSecret: nodeSecret, 59 volumeRequests: map[string]*volumeAndRequest{}, 60 minBackoffInterval: time.Second, 61 maxBackoffInterval: time.Minute, 62 maxBackoffDuration: time.Hour * 24, 63 shutdownCtx: shutdownCtx, 64 shutdownCancelFn: shutdownCancelFn, 65 } 66 } 67 68 func (c *csiHook) Name() string { 69 return "csi_hook" 70 } 71 72 func (c *csiHook) Prerun() error { 73 if !c.shouldRun() { 74 return nil 75 } 76 77 volumes, err := c.claimVolumesFromAlloc() 78 if err != nil { 79 return fmt.Errorf("claim volumes: %v", err) 80 } 81 c.volumeRequests = volumes 82 83 mounts := make(map[string]*csimanager.MountInfo, len(volumes)) 84 for alias, pair := range volumes { 85 86 // We use this context only to attach hclog to the gRPC 87 // context. The lifetime is the lifetime of the gRPC stream, 88 // not specific RPC timeouts, but we manage the stream 89 // lifetime via Close in the pluginmanager. 90 mounter, err := c.csimanager.MounterForPlugin(c.shutdownCtx, pair.volume.PluginID) 91 if err != nil { 92 return err 93 } 94 95 usageOpts := &csimanager.UsageOptions{ 96 ReadOnly: pair.request.ReadOnly, 97 AttachmentMode: pair.request.AttachmentMode, 98 AccessMode: pair.request.AccessMode, 99 MountOptions: pair.request.MountOptions, 100 } 101 102 mountInfo, err := mounter.MountVolume( 103 c.shutdownCtx, pair.volume, c.alloc, usageOpts, pair.publishContext) 104 if err != nil { 105 return err 106 } 107 108 mounts[alias] = mountInfo 109 } 110 111 res := c.updater.GetAllocHookResources() 112 res.CSIMounts = mounts 113 c.updater.SetAllocHookResources(res) 114 115 return nil 116 } 117 118 // Postrun sends an RPC to the server to unpublish the volume. This may 119 // forward client RPCs to the node plugins or to the controller plugins, 120 // depending on whether other allocations on this node have claims on this 121 // volume. 122 func (c *csiHook) Postrun() error { 123 if !c.shouldRun() { 124 return nil 125 } 126 127 var wg sync.WaitGroup 128 errs := make(chan error, len(c.volumeRequests)) 129 130 for _, pair := range c.volumeRequests { 131 wg.Add(1) 132 // CSI RPCs can potentially take a long time. Split the work 133 // into goroutines so that operators could potentially reuse 134 // one of a set of volumes 135 go func(pair *volumeAndRequest) { 136 defer wg.Done() 137 err := c.unmountImpl(pair) 138 if err != nil { 139 // we can recover an unmount failure if the operator 140 // brings the plugin back up, so retry every few minutes 141 // but eventually give up. Don't block shutdown so that 142 // we don't block shutting down the client in -dev mode 143 go func(pair *volumeAndRequest) { 144 err := c.unmountWithRetry(pair) 145 if err != nil { 146 c.logger.Error("volume could not be unmounted") 147 } 148 err = c.unpublish(pair) 149 if err != nil { 150 c.logger.Error("volume could not be unpublished") 151 } 152 }(pair) 153 } 154 155 // we can't recover from this RPC error client-side; the 156 // volume claim GC job will have to clean up for us once 157 // the allocation is marked terminal 158 errs <- c.unpublish(pair) 159 }(pair) 160 } 161 162 wg.Wait() 163 close(errs) // so we don't block waiting if there were no errors 164 165 var mErr *multierror.Error 166 for err := range errs { 167 mErr = multierror.Append(mErr, err) 168 } 169 170 return mErr.ErrorOrNil() 171 } 172 173 type volumeAndRequest struct { 174 volume *structs.CSIVolume 175 request *structs.VolumeRequest 176 177 // When volumeAndRequest was returned from a volume claim, this field will be 178 // populated for plugins that require it. 179 publishContext map[string]string 180 } 181 182 // claimVolumesFromAlloc is used by the pre-run hook to fetch all of the volume 183 // metadata and claim it for use by this alloc/node at the same time. 184 func (c *csiHook) claimVolumesFromAlloc() (map[string]*volumeAndRequest, error) { 185 result := make(map[string]*volumeAndRequest) 186 tg := c.alloc.Job.LookupTaskGroup(c.alloc.TaskGroup) 187 supportsVolumes := false 188 189 for _, task := range tg.Tasks { 190 caps, err := c.taskCapabilityGetter.GetTaskDriverCapabilities(task.Name) 191 if err != nil { 192 return nil, fmt.Errorf("could not validate task driver capabilities: %v", err) 193 } 194 195 if caps.MountConfigs == drivers.MountConfigSupportNone { 196 continue 197 } 198 199 supportsVolumes = true 200 break 201 } 202 203 if !supportsVolumes { 204 return nil, fmt.Errorf("no task supports CSI") 205 } 206 207 // Initially, populate the result map with all of the requests 208 for alias, volumeRequest := range tg.Volumes { 209 if volumeRequest.Type == structs.VolumeTypeCSI { 210 result[alias] = &volumeAndRequest{request: volumeRequest} 211 } 212 } 213 214 // Iterate over the result map and upsert the volume field as each volume gets 215 // claimed by the server. 216 for alias, pair := range result { 217 claimType := structs.CSIVolumeClaimWrite 218 if pair.request.ReadOnly { 219 claimType = structs.CSIVolumeClaimRead 220 } 221 222 source := pair.request.Source 223 if pair.request.PerAlloc { 224 source = source + structs.AllocSuffix(c.alloc.Name) 225 } 226 227 req := &structs.CSIVolumeClaimRequest{ 228 VolumeID: source, 229 AllocationID: c.alloc.ID, 230 NodeID: c.alloc.NodeID, 231 Claim: claimType, 232 AccessMode: pair.request.AccessMode, 233 AttachmentMode: pair.request.AttachmentMode, 234 WriteRequest: structs.WriteRequest{ 235 Region: c.alloc.Job.Region, 236 Namespace: c.alloc.Job.Namespace, 237 AuthToken: c.nodeSecret, 238 }, 239 } 240 241 resp, err := c.claimWithRetry(req) 242 if err != nil { 243 return nil, fmt.Errorf("could not claim volume %s: %w", req.VolumeID, err) 244 } 245 if resp.Volume == nil { 246 return nil, fmt.Errorf("Unexpected nil volume returned for ID: %v", pair.request.Source) 247 } 248 249 result[alias].request = pair.request 250 result[alias].volume = resp.Volume 251 result[alias].publishContext = resp.PublishContext 252 } 253 254 return result, nil 255 } 256 257 // claimWithRetry tries to claim the volume on the server, retrying 258 // with exponential backoff capped to a maximum interval 259 func (c *csiHook) claimWithRetry(req *structs.CSIVolumeClaimRequest) (*structs.CSIVolumeClaimResponse, error) { 260 261 ctx, cancel := context.WithTimeout(c.shutdownCtx, c.maxBackoffDuration) 262 defer cancel() 263 264 var resp structs.CSIVolumeClaimResponse 265 var err error 266 backoff := c.minBackoffInterval 267 t, stop := helper.NewSafeTimer(0) 268 defer stop() 269 for { 270 select { 271 case <-ctx.Done(): 272 return nil, err 273 case <-t.C: 274 } 275 276 err = c.rpcClient.RPC("CSIVolume.Claim", req, &resp) 277 if err == nil { 278 break 279 } 280 281 if !isRetryableClaimRPCError(err) { 282 break 283 } 284 285 if backoff < c.maxBackoffInterval { 286 backoff = backoff * 2 287 if backoff > c.maxBackoffInterval { 288 backoff = c.maxBackoffInterval 289 } 290 } 291 c.logger.Debug( 292 "volume could not be claimed because it is in use", "retry_in", backoff) 293 t.Reset(backoff) 294 } 295 return &resp, err 296 } 297 298 // isRetryableClaimRPCError looks for errors where we need to retry 299 // with backoff because we expect them to be eventually resolved. 300 func isRetryableClaimRPCError(err error) bool { 301 302 // note: because these errors are returned via RPC which breaks error 303 // wrapping, we can't check with errors.Is and need to read the string 304 errMsg := err.Error() 305 if strings.Contains(errMsg, structs.ErrCSIVolumeMaxClaims.Error()) { 306 return true 307 } 308 if strings.Contains(errMsg, structs.ErrCSIClientRPCRetryable.Error()) { 309 return true 310 } 311 if strings.Contains(errMsg, "no servers") { 312 return true 313 } 314 if strings.Contains(errMsg, structs.ErrNoLeader.Error()) { 315 return true 316 } 317 return false 318 } 319 320 func (c *csiHook) shouldRun() bool { 321 tg := c.alloc.Job.LookupTaskGroup(c.alloc.TaskGroup) 322 for _, vol := range tg.Volumes { 323 if vol.Type == structs.VolumeTypeCSI { 324 return true 325 } 326 } 327 328 return false 329 } 330 331 func (c *csiHook) unpublish(pair *volumeAndRequest) error { 332 333 mode := structs.CSIVolumeClaimRead 334 if !pair.request.ReadOnly { 335 mode = structs.CSIVolumeClaimWrite 336 } 337 338 source := pair.request.Source 339 if pair.request.PerAlloc { 340 // NOTE: PerAlloc can't be set if we have canaries 341 source = source + structs.AllocSuffix(c.alloc.Name) 342 } 343 344 req := &structs.CSIVolumeUnpublishRequest{ 345 VolumeID: source, 346 Claim: &structs.CSIVolumeClaim{ 347 AllocationID: c.alloc.ID, 348 NodeID: c.alloc.NodeID, 349 Mode: mode, 350 State: structs.CSIVolumeClaimStateUnpublishing, 351 }, 352 WriteRequest: structs.WriteRequest{ 353 Region: c.alloc.Job.Region, 354 Namespace: c.alloc.Job.Namespace, 355 AuthToken: c.nodeSecret, 356 }, 357 } 358 359 return c.rpcClient.RPC("CSIVolume.Unpublish", 360 req, &structs.CSIVolumeUnpublishResponse{}) 361 362 } 363 364 // unmountWithRetry tries to unmount/unstage the volume, retrying with 365 // exponential backoff capped to a maximum interval 366 func (c *csiHook) unmountWithRetry(pair *volumeAndRequest) error { 367 368 ctx, cancel := context.WithTimeout(c.shutdownCtx, c.maxBackoffDuration) 369 defer cancel() 370 var err error 371 backoff := c.minBackoffInterval 372 t, stop := helper.NewSafeTimer(0) 373 defer stop() 374 for { 375 select { 376 case <-ctx.Done(): 377 return err 378 case <-t.C: 379 } 380 381 err = c.unmountImpl(pair) 382 if err == nil { 383 break 384 } 385 386 if backoff < c.maxBackoffInterval { 387 backoff = backoff * 2 388 if backoff > c.maxBackoffInterval { 389 backoff = c.maxBackoffInterval 390 } 391 } 392 c.logger.Debug("volume could not be unmounted", "retry_in", backoff) 393 t.Reset(backoff) 394 } 395 return nil 396 } 397 398 // unmountImpl implements the call to the CSI plugin manager to 399 // unmount the volume. Each retry will write an "Unmount volume" 400 // NodeEvent 401 func (c *csiHook) unmountImpl(pair *volumeAndRequest) error { 402 403 mounter, err := c.csimanager.MounterForPlugin(c.shutdownCtx, pair.volume.PluginID) 404 if err != nil { 405 return err 406 } 407 408 usageOpts := &csimanager.UsageOptions{ 409 ReadOnly: pair.request.ReadOnly, 410 AttachmentMode: pair.request.AttachmentMode, 411 AccessMode: pair.request.AccessMode, 412 MountOptions: pair.request.MountOptions, 413 } 414 415 return mounter.UnmountVolume(c.shutdownCtx, 416 pair.volume.ID, pair.volume.RemoteID(), c.alloc.ID, usageOpts) 417 } 418 419 // Shutdown will get called when the client is gracefully 420 // stopping. Cancel our shutdown context so that we don't block client 421 // shutdown while in the CSI RPC retry loop. 422 func (c *csiHook) Shutdown() { 423 c.logger.Trace("shutting down hook") 424 c.shutdownCancelFn() 425 } 426 427 // Destroy will get called when an allocation gets GC'd on the client 428 // or when a -dev mode client is stopped. Cancel our shutdown context 429 // so that we don't block client shutdown while in the CSI RPC retry 430 // loop. 431 func (c *csiHook) Destroy() { 432 c.logger.Trace("destroying hook") 433 c.shutdownCancelFn() 434 }