go.etcd.io/etcd@v3.3.27+incompatible/mvcc/key_index.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvcc
    16  
    17  import (
    18  	"bytes"
    19  	"errors"
    20  	"fmt"
    21  
    22  	"github.com/google/btree"
    23  )
    24  
    25  var (
    26  	ErrRevisionNotFound = errors.New("mvcc: revision not found")
    27  )
    28  
    29  // keyIndex stores the revisions of a key in the backend.
    30  // Each keyIndex has at least one key generation.
    31  // Each generation might have several key versions.
    32  // Tombstone on a key appends an tombstone version at the end
    33  // of the current generation and creates a new empty generation.
    34  // Each version of a key has an index pointing to the backend.
    35  //
    36  // For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
    37  // generate a keyIndex:
    38  // key:     "foo"
    39  // rev: 5
    40  // generations:
    41  //    {empty}
    42  //    {4.0, 5.0(t)}
    43  //    {1.0, 2.0, 3.0(t)}
    44  //
    45  // Compact a keyIndex removes the versions with smaller or equal to
    46  // rev except the largest one. If the generation becomes empty
    47  // during compaction, it will be removed. if all the generations get
    48  // removed, the keyIndex should be removed.
    49  //
    50  // For example:
    51  // compact(2) on the previous example
    52  // generations:
    53  //    {empty}
    54  //    {4.0, 5.0(t)}
    55  //    {2.0, 3.0(t)}
    56  //
    57  // compact(4)
    58  // generations:
    59  //    {empty}
    60  //    {4.0, 5.0(t)}
    61  //
    62  // compact(5):
    63  // generations:
    64  //    {empty} -> key SHOULD be removed.
    65  //
    66  // compact(6):
    67  // generations:
    68  //    {empty} -> key SHOULD be removed.
    69  type keyIndex struct {
    70  	key         []byte
    71  	modified    revision // the main rev of the last modification
    72  	generations []generation
    73  }
    74  
    75  // put puts a revision to the keyIndex.
    76  func (ki *keyIndex) put(main int64, sub int64) {
    77  	rev := revision{main: main, sub: sub}
    78  
    79  	if !rev.GreaterThan(ki.modified) {
    80  		plog.Panicf("store.keyindex: put with unexpected smaller revision [%v / %v]", rev, ki.modified)
    81  	}
    82  	if len(ki.generations) == 0 {
    83  		ki.generations = append(ki.generations, generation{})
    84  	}
    85  	g := &ki.generations[len(ki.generations)-1]
    86  	if len(g.revs) == 0 { // create a new key
    87  		keysGauge.Inc()
    88  		g.created = rev
    89  	}
    90  	g.revs = append(g.revs, rev)
    91  	g.ver++
    92  	ki.modified = rev
    93  }
    94  
    95  func (ki *keyIndex) restore(created, modified revision, ver int64) {
    96  	if len(ki.generations) != 0 {
    97  		plog.Panicf("store.keyindex: cannot restore non-empty keyIndex")
    98  	}
    99  
   100  	ki.modified = modified
   101  	g := generation{created: created, ver: ver, revs: []revision{modified}}
   102  	ki.generations = append(ki.generations, g)
   103  	keysGauge.Inc()
   104  }
   105  
   106  // tombstone puts a revision, pointing to a tombstone, to the keyIndex.
   107  // It also creates a new empty generation in the keyIndex.
   108  // It returns ErrRevisionNotFound when tombstone on an empty generation.
   109  func (ki *keyIndex) tombstone(main int64, sub int64) error {
   110  	if ki.isEmpty() {
   111  		plog.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
   112  	}
   113  	if ki.generations[len(ki.generations)-1].isEmpty() {
   114  		return ErrRevisionNotFound
   115  	}
   116  	ki.put(main, sub)
   117  	ki.generations = append(ki.generations, generation{})
   118  	keysGauge.Dec()
   119  	return nil
   120  }
   121  
   122  // get gets the modified, created revision and version of the key that satisfies the given atRev.
   123  // Rev must be higher than or equal to the given atRev.
   124  func (ki *keyIndex) get(atRev int64) (modified, created revision, ver int64, err error) {
   125  	if ki.isEmpty() {
   126  		plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
   127  	}
   128  	g := ki.findGeneration(atRev)
   129  	if g.isEmpty() {
   130  		return revision{}, revision{}, 0, ErrRevisionNotFound
   131  	}
   132  
   133  	n := g.walk(func(rev revision) bool { return rev.main > atRev })
   134  	if n != -1 {
   135  		return g.revs[n], g.created, g.ver - int64(len(g.revs)-n-1), nil
   136  	}
   137  
   138  	return revision{}, revision{}, 0, ErrRevisionNotFound
   139  }
   140  
   141  // since returns revisions since the given rev. Only the revision with the
   142  // largest sub revision will be returned if multiple revisions have the same
   143  // main revision.
   144  func (ki *keyIndex) since(rev int64) []revision {
   145  	if ki.isEmpty() {
   146  		plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
   147  	}
   148  	since := revision{rev, 0}
   149  	var gi int
   150  	// find the generations to start checking
   151  	for gi = len(ki.generations) - 1; gi > 0; gi-- {
   152  		g := ki.generations[gi]
   153  		if g.isEmpty() {
   154  			continue
   155  		}
   156  		if since.GreaterThan(g.created) {
   157  			break
   158  		}
   159  	}
   160  
   161  	var revs []revision
   162  	var last int64
   163  	for ; gi < len(ki.generations); gi++ {
   164  		for _, r := range ki.generations[gi].revs {
   165  			if since.GreaterThan(r) {
   166  				continue
   167  			}
   168  			if r.main == last {
   169  				// replace the revision with a new one that has higher sub value,
   170  				// because the original one should not be seen by external
   171  				revs[len(revs)-1] = r
   172  				continue
   173  			}
   174  			revs = append(revs, r)
   175  			last = r.main
   176  		}
   177  	}
   178  	return revs
   179  }
   180  
   181  // compact compacts a keyIndex by removing the versions with smaller or equal
   182  // revision than the given atRev except the largest one (If the largest one is
   183  // a tombstone, it will not be kept).
   184  // If a generation becomes empty during compaction, it will be removed.
   185  func (ki *keyIndex) compact(atRev int64, available map[revision]struct{}) {
   186  	if ki.isEmpty() {
   187  		plog.Panicf("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
   188  	}
   189  
   190  	genIdx, revIndex := ki.doCompact(atRev, available)
   191  
   192  	g := &ki.generations[genIdx]
   193  	if !g.isEmpty() {
   194  		// remove the previous contents.
   195  		if revIndex != -1 {
   196  			g.revs = g.revs[revIndex:]
   197  		}
   198  		// remove any tombstone
   199  		if len(g.revs) == 1 && genIdx != len(ki.generations)-1 {
   200  			delete(available, g.revs[0])
   201  			genIdx++
   202  		}
   203  	}
   204  
   205  	// remove the previous generations.
   206  	ki.generations = ki.generations[genIdx:]
   207  }
   208  
   209  // keep finds the revision to be kept if compact is called at given atRev.
   210  func (ki *keyIndex) keep(atRev int64, available map[revision]struct{}) {
   211  	if ki.isEmpty() {
   212  		return
   213  	}
   214  
   215  	genIdx, revIndex := ki.doCompact(atRev, available)
   216  	g := &ki.generations[genIdx]
   217  	if !g.isEmpty() {
   218  		// remove any tombstone
   219  		if revIndex == len(g.revs)-1 && genIdx != len(ki.generations)-1 {
   220  			delete(available, g.revs[revIndex])
   221  		}
   222  	}
   223  }
   224  
   225  func (ki *keyIndex) doCompact(atRev int64, available map[revision]struct{}) (genIdx int, revIndex int) {
   226  	// walk until reaching the first revision smaller or equal to "atRev",
   227  	// and add the revision to the available map
   228  	f := func(rev revision) bool {
   229  		if rev.main <= atRev {
   230  			available[rev] = struct{}{}
   231  			return false
   232  		}
   233  		return true
   234  	}
   235  
   236  	genIdx, g := 0, &ki.generations[0]
   237  	// find first generation includes atRev or created after atRev
   238  	for genIdx < len(ki.generations)-1 {
   239  		if tomb := g.revs[len(g.revs)-1].main; tomb > atRev {
   240  			break
   241  		}
   242  		genIdx++
   243  		g = &ki.generations[genIdx]
   244  	}
   245  
   246  	revIndex = g.walk(f)
   247  
   248  	return genIdx, revIndex
   249  }
   250  
   251  func (ki *keyIndex) isEmpty() bool {
   252  	return len(ki.generations) == 1 && ki.generations[0].isEmpty()
   253  }
   254  
   255  // findGeneration finds out the generation of the keyIndex that the
   256  // given rev belongs to. If the given rev is at the gap of two generations,
   257  // which means that the key does not exist at the given rev, it returns nil.
   258  func (ki *keyIndex) findGeneration(rev int64) *generation {
   259  	lastg := len(ki.generations) - 1
   260  	cg := lastg
   261  
   262  	for cg >= 0 {
   263  		if len(ki.generations[cg].revs) == 0 {
   264  			cg--
   265  			continue
   266  		}
   267  		g := ki.generations[cg]
   268  		if cg != lastg {
   269  			if tomb := g.revs[len(g.revs)-1].main; tomb <= rev {
   270  				return nil
   271  			}
   272  		}
   273  		if g.revs[0].main <= rev {
   274  			return &ki.generations[cg]
   275  		}
   276  		cg--
   277  	}
   278  	return nil
   279  }
   280  
   281  func (a *keyIndex) Less(b btree.Item) bool {
   282  	return bytes.Compare(a.key, b.(*keyIndex).key) == -1
   283  }
   284  
   285  func (a *keyIndex) equal(b *keyIndex) bool {
   286  	if !bytes.Equal(a.key, b.key) {
   287  		return false
   288  	}
   289  	if a.modified != b.modified {
   290  		return false
   291  	}
   292  	if len(a.generations) != len(b.generations) {
   293  		return false
   294  	}
   295  	for i := range a.generations {
   296  		ag, bg := a.generations[i], b.generations[i]
   297  		if !ag.equal(bg) {
   298  			return false
   299  		}
   300  	}
   301  	return true
   302  }
   303  
   304  func (ki *keyIndex) String() string {
   305  	var s string
   306  	for _, g := range ki.generations {
   307  		s += g.String()
   308  	}
   309  	return s
   310  }
   311  
   312  // generation contains multiple revisions of a key.
   313  type generation struct {
   314  	ver     int64
   315  	created revision // when the generation is created (put in first revision).
   316  	revs    []revision
   317  }
   318  
   319  func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
   320  
   321  // walk walks through the revisions in the generation in descending order.
   322  // It passes the revision to the given function.
   323  // walk returns until: 1. it finishes walking all pairs 2. the function returns false.
   324  // walk returns the position at where it stopped. If it stopped after
   325  // finishing walking, -1 will be returned.
   326  func (g *generation) walk(f func(rev revision) bool) int {
   327  	l := len(g.revs)
   328  	for i := range g.revs {
   329  		ok := f(g.revs[l-i-1])
   330  		if !ok {
   331  			return l - i - 1
   332  		}
   333  	}
   334  	return -1
   335  }
   336  
   337  func (g *generation) String() string {
   338  	return fmt.Sprintf("g: created[%d] ver[%d], revs %#v\n", g.created, g.ver, g.revs)
   339  }
   340  
   341  func (a generation) equal(b generation) bool {
   342  	if a.ver != b.ver {
   343  		return false
   344  	}
   345  	if len(a.revs) != len(b.revs) {
   346  		return false
   347  	}
   348  
   349  	for i := range a.revs {
   350  		ar, br := a.revs[i], b.revs[i]
   351  		if ar != br {
   352  			return false
   353  		}
   354  	}
   355  	return true
   356  }