github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/nomad/volumewatcher/volumes_watcher.go (about) 1 package volumewatcher 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 log "github.com/hashicorp/go-hclog" 9 memdb "github.com/hashicorp/go-memdb" 10 "github.com/hashicorp/nomad/nomad/state" 11 "github.com/hashicorp/nomad/nomad/structs" 12 "golang.org/x/time/rate" 13 ) 14 15 const ( 16 // LimitStateQueriesPerSecond is the number of state queries allowed per 17 // second 18 LimitStateQueriesPerSecond = 100.0 19 20 // CrossVolumeUpdateBatchDuration is the duration in which volume 21 // claim updates are batched across all volume watchers before 22 // being committed to Raft. 23 CrossVolumeUpdateBatchDuration = 250 * time.Millisecond 24 ) 25 26 // Watcher is used to watch volumes and their allocations created 27 // by the scheduler and trigger the scheduler when allocation health 28 // transitions. 29 type Watcher struct { 30 enabled bool 31 logger log.Logger 32 33 // queryLimiter is used to limit the rate of blocking queries 34 queryLimiter *rate.Limiter 35 36 // updateBatchDuration is the duration in which volume 37 // claim updates are batched across all volume watchers 38 // before being committed to Raft. 39 updateBatchDuration time.Duration 40 41 // raft contains the set of Raft endpoints that can be used by the 42 // volumes watcher 43 raft VolumeRaftEndpoints 44 45 // rpc contains the set of Server methods that can be used by 46 // the volumes watcher for RPC 47 rpc ClientRPC 48 49 // state is the state that is watched for state changes. 50 state *state.StateStore 51 52 // watchers is the set of active watchers, one per volume 53 watchers map[string]*volumeWatcher 54 55 // volumeUpdateBatcher is used to batch volume claim updates 56 volumeUpdateBatcher *VolumeUpdateBatcher 57 58 // ctx and exitFn are used to cancel the watcher 59 ctx context.Context 60 exitFn context.CancelFunc 61 62 wlock sync.RWMutex 63 } 64 65 // NewVolumesWatcher returns a volumes watcher that is used to watch 66 // volumes and trigger the scheduler as needed. 67 func NewVolumesWatcher(logger log.Logger, 68 raft VolumeRaftEndpoints, rpc ClientRPC, stateQueriesPerSecond float64, 69 updateBatchDuration time.Duration) *Watcher { 70 71 // the leader step-down calls SetEnabled(false) which is what 72 // cancels this context, rather than passing in its own shutdown 73 // context 74 ctx, exitFn := context.WithCancel(context.Background()) 75 76 return &Watcher{ 77 raft: raft, 78 rpc: rpc, 79 queryLimiter: rate.NewLimiter(rate.Limit(stateQueriesPerSecond), 100), 80 updateBatchDuration: updateBatchDuration, 81 logger: logger.Named("volumes_watcher"), 82 ctx: ctx, 83 exitFn: exitFn, 84 } 85 } 86 87 // SetEnabled is used to control if the watcher is enabled. The 88 // watcher should only be enabled on the active leader. When being 89 // enabled the state is passed in as it is no longer valid once a 90 // leader election has taken place. 91 func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) { 92 w.wlock.Lock() 93 defer w.wlock.Unlock() 94 95 wasEnabled := w.enabled 96 w.enabled = enabled 97 98 if state != nil { 99 w.state = state 100 } 101 102 // Flush the state to create the necessary objects 103 w.flush() 104 105 // If we are starting now, launch the watch daemon 106 if enabled && !wasEnabled { 107 go w.watchVolumes(w.ctx) 108 } 109 } 110 111 // flush is used to clear the state of the watcher 112 func (w *Watcher) flush() { 113 // Stop all the watchers and clear it 114 for _, watcher := range w.watchers { 115 watcher.Stop() 116 } 117 118 // Kill everything associated with the watcher 119 if w.exitFn != nil { 120 w.exitFn() 121 } 122 123 w.watchers = make(map[string]*volumeWatcher, 32) 124 w.ctx, w.exitFn = context.WithCancel(context.Background()) 125 w.volumeUpdateBatcher = NewVolumeUpdateBatcher(w.updateBatchDuration, w.raft, w.ctx) 126 } 127 128 // watchVolumes is the long lived go-routine that watches for volumes to 129 // add and remove watchers on. 130 func (w *Watcher) watchVolumes(ctx context.Context) { 131 vIndex := uint64(1) 132 for { 133 volumes, idx, err := w.getVolumes(ctx, vIndex) 134 if err != nil { 135 if err == context.Canceled { 136 return 137 } 138 w.logger.Error("failed to retrieve volumes", "error", err) 139 } 140 141 vIndex = idx // last-seen index 142 for _, v := range volumes { 143 if err := w.add(v); err != nil { 144 w.logger.Error("failed to track volume", "volume_id", v.ID, "error", err) 145 } 146 147 } 148 } 149 } 150 151 // getVolumes retrieves all volumes blocking at the given index. 152 func (w *Watcher) getVolumes(ctx context.Context, minIndex uint64) ([]*structs.CSIVolume, uint64, error) { 153 resp, index, err := w.state.BlockingQuery(w.getVolumesImpl, minIndex, ctx) 154 if err != nil { 155 return nil, 0, err 156 } 157 158 return resp.([]*structs.CSIVolume), index, nil 159 } 160 161 // getVolumesImpl retrieves all volumes from the passed state store. 162 func (w *Watcher) getVolumesImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { 163 164 iter, err := state.CSIVolumes(ws) 165 if err != nil { 166 return nil, 0, err 167 } 168 169 var volumes []*structs.CSIVolume 170 for { 171 raw := iter.Next() 172 if raw == nil { 173 break 174 } 175 volume := raw.(*structs.CSIVolume) 176 volumes = append(volumes, volume) 177 } 178 179 // Use the last index that affected the volume table 180 index, err := state.Index("csi_volumes") 181 if err != nil { 182 return nil, 0, err 183 } 184 185 return volumes, index, nil 186 } 187 188 // add adds a volume to the watch list 189 func (w *Watcher) add(d *structs.CSIVolume) error { 190 w.wlock.Lock() 191 defer w.wlock.Unlock() 192 _, err := w.addLocked(d) 193 return err 194 } 195 196 // addLocked adds a volume to the watch list and should only be called when 197 // locked. Creating the volumeWatcher starts a go routine to .watch() it 198 func (w *Watcher) addLocked(v *structs.CSIVolume) (*volumeWatcher, error) { 199 // Not enabled so no-op 200 if !w.enabled { 201 return nil, nil 202 } 203 204 // Already watched so trigger an update for the volume 205 if watcher, ok := w.watchers[v.ID+v.Namespace]; ok { 206 watcher.Notify(v) 207 return nil, nil 208 } 209 210 watcher := newVolumeWatcher(w, v) 211 w.watchers[v.ID+v.Namespace] = watcher 212 return watcher, nil 213 } 214 215 // TODO: this is currently dead code; we'll call a public remove 216 // method on the Watcher once we have a periodic GC job 217 // remove stops watching a volume and should only be called when locked. 218 func (w *Watcher) removeLocked(volID, namespace string) { 219 if !w.enabled { 220 return 221 } 222 if watcher, ok := w.watchers[volID+namespace]; ok { 223 watcher.Stop() 224 delete(w.watchers, volID+namespace) 225 } 226 } 227 228 // updatesClaims sends the claims to the batch updater and waits for 229 // the results 230 func (w *Watcher) updateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) { 231 return w.volumeUpdateBatcher.CreateUpdate(claims).Results() 232 }