github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/volumewatcher/volume_watcher.go (about) 1 package volumewatcher 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 log "github.com/hashicorp/go-hclog" 9 memdb "github.com/hashicorp/go-memdb" 10 multierror "github.com/hashicorp/go-multierror" 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/nomad/state" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 // volumeWatcher is used to watch a single volume and trigger the 17 // scheduler when allocation health transitions. 18 type volumeWatcher struct { 19 // v is the volume being watched 20 v *structs.CSIVolume 21 22 // state is the state that is watched for state changes. 23 state *state.StateStore 24 25 // server interface for CSI client RPCs 26 rpc CSIVolumeRPC 27 28 // the ACL needed to send RPCs 29 leaderAcl string 30 31 logger log.Logger 32 shutdownCtx context.Context // parent context 33 ctx context.Context // own context 34 exitFn context.CancelFunc 35 deleteFn func() 36 37 // quiescentTimeout is the time we wait until the volume has "settled" 38 // before stopping the child watcher goroutines 39 quiescentTimeout time.Duration 40 41 // updateCh is triggered when there is an updated volume 42 updateCh chan *structs.CSIVolume 43 44 wLock sync.RWMutex 45 running bool 46 } 47 48 // newVolumeWatcher returns a volume watcher that is used to watch 49 // volumes 50 func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher { 51 52 w := &volumeWatcher{ 53 updateCh: make(chan *structs.CSIVolume, 1), 54 v: vol, 55 state: parent.state, 56 rpc: parent.rpc, 57 leaderAcl: parent.leaderAcl, 58 logger: parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace), 59 shutdownCtx: parent.ctx, 60 deleteFn: func() { parent.remove(vol.ID + vol.Namespace) }, 61 quiescentTimeout: parent.quiescentTimeout, 62 } 63 64 // Start the long lived watcher that scans for allocation updates 65 w.Start() 66 return w 67 } 68 69 // Notify signals an update to the tracked volume. 70 func (vw *volumeWatcher) Notify(v *structs.CSIVolume) { 71 if !vw.isRunning() { 72 vw.Start() 73 } 74 select { 75 case vw.updateCh <- v: 76 case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped 77 } 78 } 79 80 func (vw *volumeWatcher) Start() { 81 vw.logger.Trace("starting watcher") 82 vw.wLock.Lock() 83 defer vw.wLock.Unlock() 84 vw.running = true 85 go vw.watch() 86 } 87 88 func (vw *volumeWatcher) Stop() { 89 vw.logger.Trace("no more claims") 90 vw.wLock.Lock() 91 defer vw.wLock.Unlock() 92 vw.running = false 93 } 94 95 func (vw *volumeWatcher) isRunning() bool { 96 vw.wLock.RLock() 97 defer vw.wLock.RUnlock() 98 select { 99 case <-vw.shutdownCtx.Done(): 100 return false 101 default: 102 return vw.running 103 } 104 } 105 106 // watch is the long-running function that watches for changes to a volume. 107 // Each pass steps the volume's claims through the various states of reaping 108 // until the volume has no more claims eligible to be reaped. 109 func (vw *volumeWatcher) watch() { 110 defer vw.deleteFn() 111 defer vw.Stop() 112 113 timer, stop := helper.NewSafeTimer(vw.quiescentTimeout) 114 defer stop() 115 116 for { 117 select { 118 // TODO(tgross): currently server->client RPC have no cancellation 119 // context, so we can't stop the long-runner RPCs gracefully 120 case <-vw.shutdownCtx.Done(): 121 return 122 case vol := <-vw.updateCh: 123 vol = vw.getVolume(vol) 124 if vol == nil { 125 return 126 } 127 vw.volumeReap(vol) 128 timer.Reset(vw.quiescentTimeout) 129 case <-timer.C: 130 // Wait until the volume has "settled" before stopping this 131 // goroutine so that we can handle the burst of updates around 132 // freeing claims without having to spin it back up 133 return 134 } 135 } 136 } 137 138 // getVolume returns the tracked volume, fully populated with the current 139 // state 140 func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume { 141 vw.wLock.RLock() 142 defer vw.wLock.RUnlock() 143 144 var err error 145 ws := memdb.NewWatchSet() 146 147 vol, err = vw.state.CSIVolumeByID(ws, vol.Namespace, vol.ID) 148 if err != nil { 149 vw.logger.Error("could not query for volume", "error", err) 150 return nil 151 } 152 if vol == nil { 153 return nil 154 } 155 156 vol, err = vw.state.CSIVolumeDenormalize(ws, vol) 157 if err != nil { 158 vw.logger.Error("could not query allocs for volume", "error", err) 159 return nil 160 } 161 vw.v = vol 162 return vol 163 } 164 165 // volumeReap collects errors for logging but doesn't return them 166 // to the main loop. 167 func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) { 168 vw.logger.Trace("releasing unused volume claims") 169 err := vw.volumeReapImpl(vol) 170 if err != nil { 171 vw.logger.Error("error releasing volume claims", "error", err) 172 } 173 } 174 175 func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool { 176 return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0 177 } 178 179 // volumeReapImpl unpublished all the volume's PastClaims. PastClaims 180 // will be populated from nil or terminal allocs when we call 181 // CSIVolumeDenormalize(), so this assumes we've done so in the caller 182 func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { 183 var result *multierror.Error 184 for _, claim := range vol.PastClaims { 185 err := vw.unpublish(vol, claim) 186 if err != nil { 187 result = multierror.Append(result, err) 188 } 189 } 190 return result.ErrorOrNil() 191 } 192 193 func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume { 194 195 collect := func(allocs map[string]*structs.Allocation, 196 claims map[string]*structs.CSIVolumeClaim) { 197 198 for allocID, alloc := range allocs { 199 if alloc == nil { 200 _, exists := vol.PastClaims[allocID] 201 if !exists { 202 vol.PastClaims[allocID] = &structs.CSIVolumeClaim{ 203 AllocationID: allocID, 204 State: structs.CSIVolumeClaimStateReadyToFree, 205 } 206 } 207 } else if alloc.Terminated() { 208 // don't overwrite the PastClaim if we've seen it before, 209 // so that we can track state between subsequent calls 210 _, exists := vol.PastClaims[allocID] 211 if !exists { 212 claim, ok := claims[allocID] 213 if !ok { 214 claim = &structs.CSIVolumeClaim{ 215 AllocationID: allocID, 216 NodeID: alloc.NodeID, 217 } 218 } 219 claim.State = structs.CSIVolumeClaimStateTaken 220 vol.PastClaims[allocID] = claim 221 } 222 } 223 } 224 } 225 226 collect(vol.ReadAllocs, vol.ReadClaims) 227 collect(vol.WriteAllocs, vol.WriteClaims) 228 return vol 229 } 230 231 func (vw *volumeWatcher) unpublish(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { 232 vw.logger.Trace("unpublishing volume", "alloc", claim.AllocationID) 233 req := &structs.CSIVolumeUnpublishRequest{ 234 VolumeID: vol.ID, 235 Claim: claim, 236 WriteRequest: structs.WriteRequest{ 237 Namespace: vol.Namespace, 238 Region: vw.state.Config().Region, 239 AuthToken: vw.leaderAcl, 240 }, 241 } 242 err := vw.rpc.Unpublish(req, &structs.CSIVolumeUnpublishResponse{}) 243 if err != nil { 244 return err 245 } 246 claim.State = structs.CSIVolumeClaimStateReadyToFree 247 return nil 248 }