github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/volumewatcher/volume_watcher.go (about) 1 package volumewatcher 2 3 import ( 4 "context" 5 "sync" 6 7 log "github.com/hashicorp/go-hclog" 8 memdb "github.com/hashicorp/go-memdb" 9 multierror "github.com/hashicorp/go-multierror" 10 "github.com/hashicorp/nomad/nomad/state" 11 "github.com/hashicorp/nomad/nomad/structs" 12 ) 13 14 // volumeWatcher is used to watch a single volume and trigger the 15 // scheduler when allocation health transitions. 16 type volumeWatcher struct { 17 // v is the volume being watched 18 v *structs.CSIVolume 19 20 // state is the state that is watched for state changes. 21 state *state.StateStore 22 23 // server interface for CSI client RPCs 24 rpc CSIVolumeRPC 25 26 // the ACL needed to send RPCs 27 leaderAcl string 28 29 logger log.Logger 30 shutdownCtx context.Context // parent context 31 ctx context.Context // own context 32 exitFn context.CancelFunc 33 34 // updateCh is triggered when there is an updated volume 35 updateCh chan *structs.CSIVolume 36 37 wLock sync.RWMutex 38 running bool 39 } 40 41 // newVolumeWatcher returns a volume watcher that is used to watch 42 // volumes 43 func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher { 44 45 w := &volumeWatcher{ 46 updateCh: make(chan *structs.CSIVolume, 1), 47 v: vol, 48 state: parent.state, 49 rpc: parent.rpc, 50 leaderAcl: parent.leaderAcl, 51 logger: parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace), 52 shutdownCtx: parent.ctx, 53 } 54 55 // Start the long lived watcher that scans for allocation updates 56 w.Start() 57 return w 58 } 59 60 // Notify signals an update to the tracked volume. 61 func (vw *volumeWatcher) Notify(v *structs.CSIVolume) { 62 if !vw.isRunning() { 63 vw.Start() 64 } 65 select { 66 case vw.updateCh <- v: 67 case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped 68 case <-vw.ctx.Done(): // prevent deadlock if we stopped 69 } 70 } 71 72 func (vw *volumeWatcher) Start() { 73 vw.logger.Trace("starting watcher") 74 vw.wLock.Lock() 75 defer vw.wLock.Unlock() 76 vw.running = true 77 ctx, exitFn := context.WithCancel(vw.shutdownCtx) 78 vw.ctx = ctx 79 vw.exitFn = exitFn 80 go vw.watch() 81 } 82 83 // Stop stops watching the volume. This should be called whenever a 84 // volume's claims are fully reaped or the watcher is no longer needed. 85 func (vw *volumeWatcher) Stop() { 86 vw.logger.Trace("no more claims") 87 vw.exitFn() 88 } 89 90 func (vw *volumeWatcher) isRunning() bool { 91 vw.wLock.RLock() 92 defer vw.wLock.RUnlock() 93 select { 94 case <-vw.shutdownCtx.Done(): 95 return false 96 case <-vw.ctx.Done(): 97 return false 98 default: 99 return vw.running 100 } 101 } 102 103 // watch is the long-running function that watches for changes to a volume. 104 // Each pass steps the volume's claims through the various states of reaping 105 // until the volume has no more claims eligible to be reaped. 106 func (vw *volumeWatcher) watch() { 107 for { 108 select { 109 // TODO(tgross): currently server->client RPC have no cancellation 110 // context, so we can't stop the long-runner RPCs gracefully 111 case <-vw.shutdownCtx.Done(): 112 return 113 case <-vw.ctx.Done(): 114 return 115 case vol := <-vw.updateCh: 116 // while we won't make raft writes if we get a stale update, 117 // we can still fire extra CSI RPC calls if we don't check this 118 if vol.ModifyIndex >= vw.v.ModifyIndex { 119 vol = vw.getVolume(vol) 120 if vol == nil { 121 return 122 } 123 vw.volumeReap(vol) 124 } 125 default: 126 vw.Stop() // no pending work 127 return 128 } 129 } 130 } 131 132 // getVolume returns the tracked volume, fully populated with the current 133 // state 134 func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume { 135 vw.wLock.RLock() 136 defer vw.wLock.RUnlock() 137 138 var err error 139 ws := memdb.NewWatchSet() 140 141 vol, err = vw.state.CSIVolumeDenormalizePlugins(ws, vol.Copy()) 142 if err != nil { 143 vw.logger.Error("could not query plugins for volume", "error", err) 144 return nil 145 } 146 147 vol, err = vw.state.CSIVolumeDenormalize(ws, vol) 148 if err != nil { 149 vw.logger.Error("could not query allocs for volume", "error", err) 150 return nil 151 } 152 vw.v = vol 153 return vol 154 } 155 156 // volumeReap collects errors for logging but doesn't return them 157 // to the main loop. 158 func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) { 159 vw.logger.Trace("releasing unused volume claims") 160 err := vw.volumeReapImpl(vol) 161 if err != nil { 162 vw.logger.Error("error releasing volume claims", "error", err) 163 } 164 if vw.isUnclaimed(vol) { 165 vw.Stop() 166 } 167 } 168 169 func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool { 170 return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0 171 } 172 173 func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { 174 175 // PastClaims written by a volume GC core job will have no allocation, 176 // so we need to find out which allocs are eligible for cleanup. 177 for _, claim := range vol.PastClaims { 178 if claim.AllocationID == "" { 179 vol = vw.collectPastClaims(vol) 180 break // only need to collect once 181 } 182 } 183 184 var result *multierror.Error 185 for _, claim := range vol.PastClaims { 186 err := vw.unpublish(vol, claim) 187 if err != nil { 188 result = multierror.Append(result, err) 189 } 190 } 191 192 return result.ErrorOrNil() 193 194 } 195 196 func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume { 197 198 collect := func(allocs map[string]*structs.Allocation, 199 claims map[string]*structs.CSIVolumeClaim) { 200 201 for allocID, alloc := range allocs { 202 if alloc == nil { 203 _, exists := vol.PastClaims[allocID] 204 if !exists { 205 vol.PastClaims[allocID] = &structs.CSIVolumeClaim{ 206 AllocationID: allocID, 207 State: structs.CSIVolumeClaimStateReadyToFree, 208 } 209 } 210 } else if alloc.Terminated() { 211 // don't overwrite the PastClaim if we've seen it before, 212 // so that we can track state between subsequent calls 213 _, exists := vol.PastClaims[allocID] 214 if !exists { 215 claim, ok := claims[allocID] 216 if !ok { 217 claim = &structs.CSIVolumeClaim{ 218 AllocationID: allocID, 219 NodeID: alloc.NodeID, 220 } 221 } 222 claim.State = structs.CSIVolumeClaimStateTaken 223 vol.PastClaims[allocID] = claim 224 } 225 } 226 } 227 } 228 229 collect(vol.ReadAllocs, vol.ReadClaims) 230 collect(vol.WriteAllocs, vol.WriteClaims) 231 return vol 232 } 233 234 func (vw *volumeWatcher) unpublish(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error { 235 req := &structs.CSIVolumeUnpublishRequest{ 236 VolumeID: vol.ID, 237 Claim: claim, 238 WriteRequest: structs.WriteRequest{ 239 Namespace: vol.Namespace, 240 Region: vw.state.Config().Region, 241 AuthToken: vw.leaderAcl, 242 }, 243 } 244 err := vw.rpc.Unpublish(req, &structs.CSIVolumeUnpublishResponse{}) 245 if err != nil { 246 return err 247 } 248 claim.State = structs.CSIVolumeClaimStateReadyToFree 249 return nil 250 }