github.com/hernad/nomad@v1.6.112/nomad/volumewatcher/volume_watcher.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package volumewatcher 5 6 import ( 7 "context" 8 "sync" 9 "time" 10 11 log "github.com/hashicorp/go-hclog" 12 memdb "github.com/hashicorp/go-memdb" 13 multierror "github.com/hashicorp/go-multierror" 14 "github.com/hernad/nomad/helper" 15 "github.com/hernad/nomad/nomad/state" 16 "github.com/hernad/nomad/nomad/structs" 17 ) 18 19 // volumeWatcher is used to watch a single volume and trigger the 20 // scheduler when allocation health transitions. 21 type volumeWatcher struct { 22 // v is the volume being watched 23 v *structs.CSIVolume 24 25 // state is the state that is watched for state changes. 26 state *state.StateStore 27 28 // server interface for CSI client RPCs 29 rpc CSIVolumeRPC 30 31 // the ACL needed to send RPCs 32 leaderAcl string 33 34 logger log.Logger 35 shutdownCtx context.Context // parent context 36 ctx context.Context // own context 37 exitFn context.CancelFunc 38 deleteFn func() 39 40 // quiescentTimeout is the time we wait until the volume has "settled" 41 // before stopping the child watcher goroutines 42 quiescentTimeout time.Duration 43 44 // updateCh is triggered when there is an updated volume 45 updateCh chan *structs.CSIVolume 46 47 wLock sync.RWMutex 48 running bool 49 } 50 51 // newVolumeWatcher returns a volume watcher that is used to watch 52 // volumes 53 func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher { 54 55 w := &volumeWatcher{ 56 updateCh: make(chan *structs.CSIVolume, 1), 57 v: vol, 58 state: parent.state, 59 rpc: parent.rpc, 60 leaderAcl: parent.leaderAcl, 61 logger: parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace), 62 shutdownCtx: parent.ctx, 63 deleteFn: func() { parent.remove(vol.ID + vol.Namespace) }, 64 quiescentTimeout: parent.quiescentTimeout, 65 } 66 67 // Start the long lived watcher that scans for allocation updates 68 w.Start() 69 return w 70 } 71 72 // Notify signals an update to the tracked volume. 73 func (vw *volumeWatcher) Notify(v *structs.CSIVolume) { 74 if !vw.isRunning() { 75 vw.Start() 76 } 77 select { 78 case vw.updateCh <- v: 79 case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped 80 } 81 } 82 83 func (vw *volumeWatcher) Start() { 84 vw.logger.Trace("starting watcher") 85 vw.wLock.Lock() 86 defer vw.wLock.Unlock() 87 vw.running = true 88 go vw.watch() 89 } 90 91 func (vw *volumeWatcher) Stop() { 92 vw.logger.Trace("no more claims") 93 vw.wLock.Lock() 94 defer vw.wLock.Unlock() 95 vw.running = false 96 } 97 98 func (vw *volumeWatcher) isRunning() bool { 99 vw.wLock.RLock() 100 defer vw.wLock.RUnlock() 101 select { 102 case <-vw.shutdownCtx.Done(): 103 return false 104 default: 105 return vw.running 106 } 107 } 108 109 // watch is the long-running function that watches for changes to a volume. 110 // Each pass steps the volume's claims through the various states of reaping 111 // until the volume has no more claims eligible to be reaped. 112 func (vw *volumeWatcher) watch() { 113 defer vw.deleteFn() 114 defer vw.Stop() 115 116 timer, stop := helper.NewSafeTimer(vw.quiescentTimeout) 117 defer stop() 118 119 for { 120 select { 121 // TODO(tgross): currently server->client RPC have no cancellation 122 // context, so we can't stop the long-runner RPCs gracefully 123 case <-vw.shutdownCtx.Done(): 124 return 125 case vol := <-vw.updateCh: 126 vol = vw.getVolume(vol) 127 if vol == nil { 128 return 129 } 130 vw.volumeReap(vol) 131 timer.Reset(vw.quiescentTimeout) 132 case <-timer.C: 133 // Wait until the volume has "settled" before stopping this 134 // goroutine so that we can handle the burst of updates around 135 // freeing claims without having to spin it back up 136 return 137 } 138 } 139 } 140 141 // getVolume returns the tracked volume, fully populated with the current 142 // state 143 func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume { 144 vw.wLock.RLock() 145 defer vw.wLock.RUnlock() 146 147 var err error 148 ws := memdb.NewWatchSet() 149 150 vol, err = vw.state.CSIVolumeByID(ws, vol.Namespace, vol.ID) 151 if err != nil { 152 vw.logger.Error("could not query for volume", "error", err) 153 return nil 154 } 155 if vol == nil { 156 return nil 157 } 158 159 vol, err = vw.state.CSIVolumeDenormalize(ws, vol) 160 if err != nil { 161 vw.logger.Error("could not query allocs for volume", "error", err) 162 return nil 163 } 164 vw.v = vol 165 return vol 166 } 167 168 // volumeReap collects errors for logging but doesn't return them 169 // to the main loop. 170 func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) { 171 vw.logger.Trace("releasing unused volume claims") 172 err := vw.volumeReapImpl(vol) 173 if err != nil { 174 vw.logger.Error("error releasing volume claims", "error", err) 175 } 176 } 177 178 func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool { 179 return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0 180 } 181 182 // volumeReapImpl unpublished all the volume's PastClaims. PastClaims 183 // will be populated from nil or terminal allocs when we call 184 // CSIVolumeDenormalize(), so this assumes we've done so in the caller 185 func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { 186 var result *multierror.Error 187 for _, claim := range vol.PastClaims { 188 err := vw.unpublish(vol, claim) 189 if err != nil { 190 result = multierror.Append(result, err) 191 } 192 } 193 return result.ErrorOrNil() 194 } 195 196 func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume { 197 198 collect := func(allocs map[string]*structs.Allocation, 199 claims map[string]*structs.CSIVolumeClaim) { 200 201 for allocID, alloc := range allocs { 202 if alloc == nil { 203 _, exists := vol.PastClaims[allocID] 204 if !exists { 205 vol.PastClaims[allocID] = &structs.CSIVolumeClaim{ 206 AllocationID: allocID, 207 State: structs.CSIVolumeClaimStateReadyToFree, 208 } 209 } 210 } else if alloc.Terminated() { 211 // don't overwrite the PastClaim if we've seen it before, 212 // so that we can track state between subsequent calls 213 _, exists := vol.PastClaims[allocID] 214 if !exists { 215 claim, ok := claims[allocID] 216 if !ok { 217 claim = &structs.CSIVolumeClaim{ 218 AllocationID: allocID, 219 NodeID: alloc.NodeID, 220 } 221 } 222 claim.State = structs.CSIVolumeClaimStateTaken 223 vol.PastClaims[allocID] = claim 224 } 225 } 226 } 227 } 228 229 collect(vol.ReadAllocs, vol.ReadClaims) 230 collect(vol.WriteAllocs, vol.WriteClaims) 231 return vol 232 } 233 234 func (vw *volumeWatcher) unpublish(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { 235 vw.logger.Trace("unpublishing volume", "alloc", claim.AllocationID) 236 req := &structs.CSIVolumeUnpublishRequest{ 237 VolumeID: vol.ID, 238 Claim: claim, 239 WriteRequest: structs.WriteRequest{ 240 Namespace: vol.Namespace, 241 Region: vw.state.Config().Region, 242 AuthToken: vw.leaderAcl, 243 }, 244 } 245 err := vw.rpc.Unpublish(req, &structs.CSIVolumeUnpublishResponse{}) 246 if err != nil { 247 return err 248 } 249 claim.State = structs.CSIVolumeClaimStateReadyToFree 250 return nil 251 }