go.etcd.io/etcd@v3.3.27+incompatible/mvcc/watcher_group.go (about) 1 // Copyright 2016 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvcc 16 17 import ( 18 "fmt" 19 "math" 20 21 "github.com/coreos/etcd/mvcc/mvccpb" 22 "github.com/coreos/etcd/pkg/adt" 23 ) 24 25 var ( 26 // watchBatchMaxRevs is the maximum distinct revisions that 27 // may be sent to an unsynced watcher at a time. Declared as 28 // var instead of const for testing purposes. 29 watchBatchMaxRevs = 1000 30 ) 31 32 type eventBatch struct { 33 // evs is a batch of revision-ordered events 34 evs []mvccpb.Event 35 // revs is the minimum unique revisions observed for this batch 36 revs int 37 // moreRev is first revision with more events following this batch 38 moreRev int64 39 } 40 41 func (eb *eventBatch) add(ev mvccpb.Event) { 42 if eb.revs > watchBatchMaxRevs { 43 // maxed out batch size 44 return 45 } 46 47 if len(eb.evs) == 0 { 48 // base case 49 eb.revs = 1 50 eb.evs = append(eb.evs, ev) 51 return 52 } 53 54 // revision accounting 55 ebRev := eb.evs[len(eb.evs)-1].Kv.ModRevision 56 evRev := ev.Kv.ModRevision 57 if evRev > ebRev { 58 eb.revs++ 59 if eb.revs > watchBatchMaxRevs { 60 eb.moreRev = evRev 61 return 62 } 63 } 64 65 eb.evs = append(eb.evs, ev) 66 } 67 68 type watcherBatch map[*watcher]*eventBatch 69 70 func (wb watcherBatch) add(w *watcher, ev mvccpb.Event) { 71 eb := wb[w] 72 if eb == nil { 73 eb = &eventBatch{} 74 wb[w] = eb 75 } 76 eb.add(ev) 77 } 78 79 // newWatcherBatch maps watchers to their matched events. It enables quick 80 // events look up by watcher. 81 func newWatcherBatch(wg *watcherGroup, evs []mvccpb.Event) watcherBatch { 82 if len(wg.watchers) == 0 { 83 return nil 84 } 85 86 wb := make(watcherBatch) 87 for _, ev := range evs { 88 for w := range wg.watcherSetByKey(string(ev.Kv.Key)) { 89 if ev.Kv.ModRevision >= w.minRev { 90 // don't double notify 91 wb.add(w, ev) 92 } 93 } 94 } 95 return wb 96 } 97 98 type watcherSet map[*watcher]struct{} 99 100 func (w watcherSet) add(wa *watcher) { 101 if _, ok := w[wa]; ok { 102 panic("add watcher twice!") 103 } 104 w[wa] = struct{}{} 105 } 106 107 func (w watcherSet) union(ws watcherSet) { 108 for wa := range ws { 109 w.add(wa) 110 } 111 } 112 113 func (w watcherSet) delete(wa *watcher) { 114 if _, ok := w[wa]; !ok { 115 panic("removing missing watcher!") 116 } 117 delete(w, wa) 118 } 119 120 type watcherSetByKey map[string]watcherSet 121 122 func (w watcherSetByKey) add(wa *watcher) { 123 set := w[string(wa.key)] 124 if set == nil { 125 set = make(watcherSet) 126 w[string(wa.key)] = set 127 } 128 set.add(wa) 129 } 130 131 func (w watcherSetByKey) delete(wa *watcher) bool { 132 k := string(wa.key) 133 if v, ok := w[k]; ok { 134 if _, ok := v[wa]; ok { 135 delete(v, wa) 136 if len(v) == 0 { 137 // remove the set; nothing left 138 delete(w, k) 139 } 140 return true 141 } 142 } 143 return false 144 } 145 146 // watcherGroup is a collection of watchers organized by their ranges 147 type watcherGroup struct { 148 // keyWatchers has the watchers that watch on a single key 149 keyWatchers watcherSetByKey 150 // ranges has the watchers that watch a range; it is sorted by interval 151 ranges adt.IntervalTree 152 // watchers is the set of all watchers 153 watchers watcherSet 154 } 155 156 func newWatcherGroup() watcherGroup { 157 return watcherGroup{ 158 keyWatchers: make(watcherSetByKey), 159 ranges: adt.NewIntervalTree(), 160 watchers: make(watcherSet), 161 } 162 } 163 164 // add puts a watcher in the group. 165 func (wg *watcherGroup) add(wa *watcher) { 166 wg.watchers.add(wa) 167 if wa.end == nil { 168 wg.keyWatchers.add(wa) 169 return 170 } 171 172 // interval already registered? 173 ivl := adt.NewStringAffineInterval(string(wa.key), string(wa.end)) 174 if iv := wg.ranges.Find(ivl); iv != nil { 175 iv.Val.(watcherSet).add(wa) 176 return 177 } 178 179 // not registered, put in interval tree 180 ws := make(watcherSet) 181 ws.add(wa) 182 wg.ranges.Insert(ivl, ws) 183 } 184 185 // contains is whether the given key has a watcher in the group. 186 func (wg *watcherGroup) contains(key string) bool { 187 _, ok := wg.keyWatchers[key] 188 return ok || wg.ranges.Intersects(adt.NewStringAffinePoint(key)) 189 } 190 191 // size gives the number of unique watchers in the group. 192 func (wg *watcherGroup) size() int { return len(wg.watchers) } 193 194 // delete removes a watcher from the group. 195 func (wg *watcherGroup) delete(wa *watcher) bool { 196 if _, ok := wg.watchers[wa]; !ok { 197 return false 198 } 199 wg.watchers.delete(wa) 200 if wa.end == nil { 201 wg.keyWatchers.delete(wa) 202 return true 203 } 204 205 ivl := adt.NewStringAffineInterval(string(wa.key), string(wa.end)) 206 iv := wg.ranges.Find(ivl) 207 if iv == nil { 208 return false 209 } 210 211 ws := iv.Val.(watcherSet) 212 delete(ws, wa) 213 if len(ws) == 0 { 214 // remove interval missing watchers 215 if ok := wg.ranges.Delete(ivl); !ok { 216 panic("could not remove watcher from interval tree") 217 } 218 } 219 220 return true 221 } 222 223 // choose selects watchers from the watcher group to update 224 func (wg *watcherGroup) choose(maxWatchers int, curRev, compactRev int64) (*watcherGroup, int64) { 225 if len(wg.watchers) < maxWatchers { 226 return wg, wg.chooseAll(curRev, compactRev) 227 } 228 ret := newWatcherGroup() 229 for w := range wg.watchers { 230 if maxWatchers <= 0 { 231 break 232 } 233 maxWatchers-- 234 ret.add(w) 235 } 236 return &ret, ret.chooseAll(curRev, compactRev) 237 } 238 239 func (wg *watcherGroup) chooseAll(curRev, compactRev int64) int64 { 240 minRev := int64(math.MaxInt64) 241 for w := range wg.watchers { 242 if w.minRev > curRev { 243 // after network partition, possibly choosing future revision watcher from restore operation 244 // with watch key "proxy-namespace__lostleader" and revision "math.MaxInt64 - 2" 245 // do not panic when such watcher had been moved from "synced" watcher during restore operation 246 if !w.restore { 247 panic(fmt.Errorf("watcher minimum revision %d should not exceed current revision %d", w.minRev, curRev)) 248 } 249 250 // mark 'restore' done, since it's chosen 251 w.restore = false 252 } 253 if w.minRev < compactRev { 254 select { 255 case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactRev}: 256 w.compacted = true 257 wg.delete(w) 258 default: 259 // retry next time 260 } 261 continue 262 } 263 if minRev > w.minRev { 264 minRev = w.minRev 265 } 266 } 267 return minRev 268 } 269 270 // watcherSetByKey gets the set of watchers that receive events on the given key. 271 func (wg *watcherGroup) watcherSetByKey(key string) watcherSet { 272 wkeys := wg.keyWatchers[key] 273 wranges := wg.ranges.Stab(adt.NewStringAffinePoint(key)) 274 275 // zero-copy cases 276 switch { 277 case len(wranges) == 0: 278 // no need to merge ranges or copy; reuse single-key set 279 return wkeys 280 case len(wranges) == 0 && len(wkeys) == 0: 281 return nil 282 case len(wranges) == 1 && len(wkeys) == 0: 283 return wranges[0].Val.(watcherSet) 284 } 285 286 // copy case 287 ret := make(watcherSet) 288 ret.union(wg.keyWatchers[key]) 289 for _, item := range wranges { 290 ret.union(item.Val.(watcherSet)) 291 } 292 return ret 293 }