go.etcd.io/etcd@v3.3.27+incompatible/mvcc/key_index.go (about) 1 // Copyright 2015 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvcc 16 17 import ( 18 "bytes" 19 "errors" 20 "fmt" 21 22 "github.com/google/btree" 23 ) 24 25 var ( 26 ErrRevisionNotFound = errors.New("mvcc: revision not found") 27 ) 28 29 // keyIndex stores the revisions of a key in the backend. 30 // Each keyIndex has at least one key generation. 31 // Each generation might have several key versions. 32 // Tombstone on a key appends an tombstone version at the end 33 // of the current generation and creates a new empty generation. 34 // Each version of a key has an index pointing to the backend. 35 // 36 // For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo" 37 // generate a keyIndex: 38 // key: "foo" 39 // rev: 5 40 // generations: 41 // {empty} 42 // {4.0, 5.0(t)} 43 // {1.0, 2.0, 3.0(t)} 44 // 45 // Compact a keyIndex removes the versions with smaller or equal to 46 // rev except the largest one. If the generation becomes empty 47 // during compaction, it will be removed. if all the generations get 48 // removed, the keyIndex should be removed. 49 // 50 // For example: 51 // compact(2) on the previous example 52 // generations: 53 // {empty} 54 // {4.0, 5.0(t)} 55 // {2.0, 3.0(t)} 56 // 57 // compact(4) 58 // generations: 59 // {empty} 60 // {4.0, 5.0(t)} 61 // 62 // compact(5): 63 // generations: 64 // {empty} -> key SHOULD be removed. 65 // 66 // compact(6): 67 // generations: 68 // {empty} -> key SHOULD be removed. 69 type keyIndex struct { 70 key []byte 71 modified revision // the main rev of the last modification 72 generations []generation 73 } 74 75 // put puts a revision to the keyIndex. 76 func (ki *keyIndex) put(main int64, sub int64) { 77 rev := revision{main: main, sub: sub} 78 79 if !rev.GreaterThan(ki.modified) { 80 plog.Panicf("store.keyindex: put with unexpected smaller revision [%v / %v]", rev, ki.modified) 81 } 82 if len(ki.generations) == 0 { 83 ki.generations = append(ki.generations, generation{}) 84 } 85 g := &ki.generations[len(ki.generations)-1] 86 if len(g.revs) == 0 { // create a new key 87 keysGauge.Inc() 88 g.created = rev 89 } 90 g.revs = append(g.revs, rev) 91 g.ver++ 92 ki.modified = rev 93 } 94 95 func (ki *keyIndex) restore(created, modified revision, ver int64) { 96 if len(ki.generations) != 0 { 97 plog.Panicf("store.keyindex: cannot restore non-empty keyIndex") 98 } 99 100 ki.modified = modified 101 g := generation{created: created, ver: ver, revs: []revision{modified}} 102 ki.generations = append(ki.generations, g) 103 keysGauge.Inc() 104 } 105 106 // tombstone puts a revision, pointing to a tombstone, to the keyIndex. 107 // It also creates a new empty generation in the keyIndex. 108 // It returns ErrRevisionNotFound when tombstone on an empty generation. 109 func (ki *keyIndex) tombstone(main int64, sub int64) error { 110 if ki.isEmpty() { 111 plog.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key)) 112 } 113 if ki.generations[len(ki.generations)-1].isEmpty() { 114 return ErrRevisionNotFound 115 } 116 ki.put(main, sub) 117 ki.generations = append(ki.generations, generation{}) 118 keysGauge.Dec() 119 return nil 120 } 121 122 // get gets the modified, created revision and version of the key that satisfies the given atRev. 123 // Rev must be higher than or equal to the given atRev. 124 func (ki *keyIndex) get(atRev int64) (modified, created revision, ver int64, err error) { 125 if ki.isEmpty() { 126 plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key)) 127 } 128 g := ki.findGeneration(atRev) 129 if g.isEmpty() { 130 return revision{}, revision{}, 0, ErrRevisionNotFound 131 } 132 133 n := g.walk(func(rev revision) bool { return rev.main > atRev }) 134 if n != -1 { 135 return g.revs[n], g.created, g.ver - int64(len(g.revs)-n-1), nil 136 } 137 138 return revision{}, revision{}, 0, ErrRevisionNotFound 139 } 140 141 // since returns revisions since the given rev. Only the revision with the 142 // largest sub revision will be returned if multiple revisions have the same 143 // main revision. 144 func (ki *keyIndex) since(rev int64) []revision { 145 if ki.isEmpty() { 146 plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key)) 147 } 148 since := revision{rev, 0} 149 var gi int 150 // find the generations to start checking 151 for gi = len(ki.generations) - 1; gi > 0; gi-- { 152 g := ki.generations[gi] 153 if g.isEmpty() { 154 continue 155 } 156 if since.GreaterThan(g.created) { 157 break 158 } 159 } 160 161 var revs []revision 162 var last int64 163 for ; gi < len(ki.generations); gi++ { 164 for _, r := range ki.generations[gi].revs { 165 if since.GreaterThan(r) { 166 continue 167 } 168 if r.main == last { 169 // replace the revision with a new one that has higher sub value, 170 // because the original one should not be seen by external 171 revs[len(revs)-1] = r 172 continue 173 } 174 revs = append(revs, r) 175 last = r.main 176 } 177 } 178 return revs 179 } 180 181 // compact compacts a keyIndex by removing the versions with smaller or equal 182 // revision than the given atRev except the largest one (If the largest one is 183 // a tombstone, it will not be kept). 184 // If a generation becomes empty during compaction, it will be removed. 185 func (ki *keyIndex) compact(atRev int64, available map[revision]struct{}) { 186 if ki.isEmpty() { 187 plog.Panicf("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key)) 188 } 189 190 genIdx, revIndex := ki.doCompact(atRev, available) 191 192 g := &ki.generations[genIdx] 193 if !g.isEmpty() { 194 // remove the previous contents. 195 if revIndex != -1 { 196 g.revs = g.revs[revIndex:] 197 } 198 // remove any tombstone 199 if len(g.revs) == 1 && genIdx != len(ki.generations)-1 { 200 delete(available, g.revs[0]) 201 genIdx++ 202 } 203 } 204 205 // remove the previous generations. 206 ki.generations = ki.generations[genIdx:] 207 } 208 209 // keep finds the revision to be kept if compact is called at given atRev. 210 func (ki *keyIndex) keep(atRev int64, available map[revision]struct{}) { 211 if ki.isEmpty() { 212 return 213 } 214 215 genIdx, revIndex := ki.doCompact(atRev, available) 216 g := &ki.generations[genIdx] 217 if !g.isEmpty() { 218 // remove any tombstone 219 if revIndex == len(g.revs)-1 && genIdx != len(ki.generations)-1 { 220 delete(available, g.revs[revIndex]) 221 } 222 } 223 } 224 225 func (ki *keyIndex) doCompact(atRev int64, available map[revision]struct{}) (genIdx int, revIndex int) { 226 // walk until reaching the first revision smaller or equal to "atRev", 227 // and add the revision to the available map 228 f := func(rev revision) bool { 229 if rev.main <= atRev { 230 available[rev] = struct{}{} 231 return false 232 } 233 return true 234 } 235 236 genIdx, g := 0, &ki.generations[0] 237 // find first generation includes atRev or created after atRev 238 for genIdx < len(ki.generations)-1 { 239 if tomb := g.revs[len(g.revs)-1].main; tomb > atRev { 240 break 241 } 242 genIdx++ 243 g = &ki.generations[genIdx] 244 } 245 246 revIndex = g.walk(f) 247 248 return genIdx, revIndex 249 } 250 251 func (ki *keyIndex) isEmpty() bool { 252 return len(ki.generations) == 1 && ki.generations[0].isEmpty() 253 } 254 255 // findGeneration finds out the generation of the keyIndex that the 256 // given rev belongs to. If the given rev is at the gap of two generations, 257 // which means that the key does not exist at the given rev, it returns nil. 258 func (ki *keyIndex) findGeneration(rev int64) *generation { 259 lastg := len(ki.generations) - 1 260 cg := lastg 261 262 for cg >= 0 { 263 if len(ki.generations[cg].revs) == 0 { 264 cg-- 265 continue 266 } 267 g := ki.generations[cg] 268 if cg != lastg { 269 if tomb := g.revs[len(g.revs)-1].main; tomb <= rev { 270 return nil 271 } 272 } 273 if g.revs[0].main <= rev { 274 return &ki.generations[cg] 275 } 276 cg-- 277 } 278 return nil 279 } 280 281 func (a *keyIndex) Less(b btree.Item) bool { 282 return bytes.Compare(a.key, b.(*keyIndex).key) == -1 283 } 284 285 func (a *keyIndex) equal(b *keyIndex) bool { 286 if !bytes.Equal(a.key, b.key) { 287 return false 288 } 289 if a.modified != b.modified { 290 return false 291 } 292 if len(a.generations) != len(b.generations) { 293 return false 294 } 295 for i := range a.generations { 296 ag, bg := a.generations[i], b.generations[i] 297 if !ag.equal(bg) { 298 return false 299 } 300 } 301 return true 302 } 303 304 func (ki *keyIndex) String() string { 305 var s string 306 for _, g := range ki.generations { 307 s += g.String() 308 } 309 return s 310 } 311 312 // generation contains multiple revisions of a key. 313 type generation struct { 314 ver int64 315 created revision // when the generation is created (put in first revision). 316 revs []revision 317 } 318 319 func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 } 320 321 // walk walks through the revisions in the generation in descending order. 322 // It passes the revision to the given function. 323 // walk returns until: 1. it finishes walking all pairs 2. the function returns false. 324 // walk returns the position at where it stopped. If it stopped after 325 // finishing walking, -1 will be returned. 326 func (g *generation) walk(f func(rev revision) bool) int { 327 l := len(g.revs) 328 for i := range g.revs { 329 ok := f(g.revs[l-i-1]) 330 if !ok { 331 return l - i - 1 332 } 333 } 334 return -1 335 } 336 337 func (g *generation) String() string { 338 return fmt.Sprintf("g: created[%d] ver[%d], revs %#v\n", g.created, g.ver, g.revs) 339 } 340 341 func (a generation) equal(b generation) bool { 342 if a.ver != b.ver { 343 return false 344 } 345 if len(a.revs) != len(b.revs) { 346 return false 347 } 348 349 for i := range a.revs { 350 ar, br := a.revs[i], b.revs[i] 351 if ar != br { 352 return false 353 } 354 } 355 return true 356 }