github.com/aminovpavel/nomad@v0.11.8/nomad/volumewatcher/volumes_watcher.go (about) 1 package volumewatcher 2 3 import ( 4 "context" 5 "errors" 6 "sync" 7 "time" 8 9 log "github.com/hashicorp/go-hclog" 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hashicorp/nomad/nomad/state" 12 "github.com/hashicorp/nomad/nomad/structs" 13 "golang.org/x/time/rate" 14 ) 15 16 const ( 17 // LimitStateQueriesPerSecond is the number of state queries allowed per 18 // second 19 LimitStateQueriesPerSecond = 100.0 20 21 // CrossVolumeUpdateBatchDuration is the duration in which volume 22 // claim updates are batched across all volume watchers before 23 // being committed to Raft. 24 CrossVolumeUpdateBatchDuration = 250 * time.Millisecond 25 ) 26 27 // Watcher is used to watch volumes and their allocations created 28 // by the scheduler and trigger the scheduler when allocation health 29 // transitions. 30 type Watcher struct { 31 enabled bool 32 logger log.Logger 33 34 // queryLimiter is used to limit the rate of blocking queries 35 queryLimiter *rate.Limiter 36 37 // updateBatchDuration is the duration in which volume 38 // claim updates are batched across all volume watchers 39 // before being committed to Raft. 40 updateBatchDuration time.Duration 41 42 // raft contains the set of Raft endpoints that can be used by the 43 // volumes watcher 44 raft VolumeRaftEndpoints 45 46 // rpc contains the set of Server methods that can be used by 47 // the volumes watcher for RPC 48 rpc ClientRPC 49 50 // state is the state that is watched for state changes. 51 state *state.StateStore 52 53 // watchers is the set of active watchers, one per volume 54 watchers map[string]*volumeWatcher 55 56 // volumeUpdateBatcher is used to batch volume claim updates 57 volumeUpdateBatcher *VolumeUpdateBatcher 58 59 // ctx and exitFn are used to cancel the watcher 60 ctx context.Context 61 exitFn context.CancelFunc 62 63 wlock sync.RWMutex 64 } 65 66 // NewVolumesWatcher returns a volumes watcher that is used to watch 67 // volumes and trigger the scheduler as needed. 68 func NewVolumesWatcher(logger log.Logger, 69 raft VolumeRaftEndpoints, rpc ClientRPC, stateQueriesPerSecond float64, 70 updateBatchDuration time.Duration) *Watcher { 71 72 // the leader step-down calls SetEnabled(false) which is what 73 // cancels this context, rather than passing in its own shutdown 74 // context 75 ctx, exitFn := context.WithCancel(context.Background()) 76 77 return &Watcher{ 78 raft: raft, 79 rpc: rpc, 80 queryLimiter: rate.NewLimiter(rate.Limit(stateQueriesPerSecond), 100), 81 updateBatchDuration: updateBatchDuration, 82 logger: logger.Named("volumes_watcher"), 83 ctx: ctx, 84 exitFn: exitFn, 85 } 86 } 87 88 // SetEnabled is used to control if the watcher is enabled. The 89 // watcher should only be enabled on the active leader. When being 90 // enabled the state is passed in as it is no longer valid once a 91 // leader election has taken place. 92 func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) { 93 w.wlock.Lock() 94 defer w.wlock.Unlock() 95 96 wasEnabled := w.enabled 97 w.enabled = enabled 98 99 if state != nil { 100 w.state = state 101 } 102 103 // Flush the state to create the necessary objects 104 w.flush(enabled) 105 106 // If we are starting now, launch the watch daemon 107 if enabled && !wasEnabled { 108 go w.watchVolumes(w.ctx) 109 } 110 } 111 112 // flush is used to clear the state of the watcher 113 func (w *Watcher) flush(enabled bool) { 114 // Stop all the watchers and clear it 115 for _, watcher := range w.watchers { 116 watcher.Stop() 117 } 118 119 // Kill everything associated with the watcher 120 if w.exitFn != nil { 121 w.exitFn() 122 } 123 124 w.watchers = make(map[string]*volumeWatcher, 32) 125 w.ctx, w.exitFn = context.WithCancel(context.Background()) 126 127 if enabled { 128 w.volumeUpdateBatcher = NewVolumeUpdateBatcher(w.ctx, w.updateBatchDuration, w.raft) 129 } else { 130 w.volumeUpdateBatcher = nil 131 } 132 } 133 134 // watchVolumes is the long lived go-routine that watches for volumes to 135 // add and remove watchers on. 136 func (w *Watcher) watchVolumes(ctx context.Context) { 137 vIndex := uint64(1) 138 for { 139 volumes, idx, err := w.getVolumes(ctx, vIndex) 140 if err != nil { 141 if err == context.Canceled { 142 return 143 } 144 w.logger.Error("failed to retrieve volumes", "error", err) 145 } 146 147 vIndex = idx // last-seen index 148 for _, v := range volumes { 149 if err := w.add(v); err != nil { 150 w.logger.Error("failed to track volume", "volume_id", v.ID, "error", err) 151 } 152 153 } 154 } 155 } 156 157 // getVolumes retrieves all volumes blocking at the given index. 158 func (w *Watcher) getVolumes(ctx context.Context, minIndex uint64) ([]*structs.CSIVolume, uint64, error) { 159 resp, index, err := w.state.BlockingQuery(w.getVolumesImpl, minIndex, ctx) 160 if err != nil { 161 return nil, 0, err 162 } 163 164 return resp.([]*structs.CSIVolume), index, nil 165 } 166 167 // getVolumesImpl retrieves all volumes from the passed state store. 168 func (w *Watcher) getVolumesImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { 169 170 iter, err := state.CSIVolumes(ws) 171 if err != nil { 172 return nil, 0, err 173 } 174 175 var volumes []*structs.CSIVolume 176 for { 177 raw := iter.Next() 178 if raw == nil { 179 break 180 } 181 volume := raw.(*structs.CSIVolume) 182 volumes = append(volumes, volume) 183 } 184 185 // Use the last index that affected the volume table 186 index, err := state.Index("csi_volumes") 187 if err != nil { 188 return nil, 0, err 189 } 190 191 return volumes, index, nil 192 } 193 194 // add adds a volume to the watch list 195 func (w *Watcher) add(d *structs.CSIVolume) error { 196 w.wlock.Lock() 197 defer w.wlock.Unlock() 198 _, err := w.addLocked(d) 199 return err 200 } 201 202 // addLocked adds a volume to the watch list and should only be called when 203 // locked. Creating the volumeWatcher starts a go routine to .watch() it 204 func (w *Watcher) addLocked(v *structs.CSIVolume) (*volumeWatcher, error) { 205 // Not enabled so no-op 206 if !w.enabled { 207 return nil, nil 208 } 209 210 // Already watched so trigger an update for the volume 211 if watcher, ok := w.watchers[v.ID+v.Namespace]; ok { 212 watcher.Notify(v) 213 return nil, nil 214 } 215 216 watcher := newVolumeWatcher(w, v) 217 w.watchers[v.ID+v.Namespace] = watcher 218 return watcher, nil 219 } 220 221 // TODO: this is currently dead code; we'll call a public remove 222 // method on the Watcher once we have a periodic GC job 223 // remove stops watching a volume and should only be called when locked. 224 func (w *Watcher) removeLocked(volID, namespace string) { 225 if !w.enabled { 226 return 227 } 228 if watcher, ok := w.watchers[volID+namespace]; ok { 229 watcher.Stop() 230 delete(w.watchers, volID+namespace) 231 } 232 } 233 234 // updatesClaims sends the claims to the batch updater and waits for 235 // the results 236 func (w *Watcher) updateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) { 237 b := w.volumeUpdateBatcher 238 if b == nil { 239 return 0, errors.New("volume watcher is not enabled") 240 } 241 return b.CreateUpdate(claims).Results() 242 }