go.etcd.io/etcd@v3.3.27+incompatible/mvcc/index.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mvcc
    16  
    17  import (
    18  	"sort"
    19  	"sync"
    20  
    21  	"github.com/google/btree"
    22  )
    23  
    24  type index interface {
    25  	Get(key []byte, atRev int64) (rev, created revision, ver int64, err error)
    26  	Range(key, end []byte, atRev int64) ([][]byte, []revision)
    27  	Revisions(key, end []byte, atRev int64) []revision
    28  	Put(key []byte, rev revision)
    29  	Tombstone(key []byte, rev revision) error
    30  	RangeSince(key, end []byte, rev int64) []revision
    31  	Compact(rev int64) map[revision]struct{}
    32  	Keep(rev int64) map[revision]struct{}
    33  	Equal(b index) bool
    34  
    35  	Insert(ki *keyIndex)
    36  	KeyIndex(ki *keyIndex) *keyIndex
    37  }
    38  
    39  type treeIndex struct {
    40  	sync.RWMutex
    41  	tree *btree.BTree
    42  }
    43  
    44  func newTreeIndex() index {
    45  	return &treeIndex{
    46  		tree: btree.New(32),
    47  	}
    48  }
    49  
    50  func (ti *treeIndex) Put(key []byte, rev revision) {
    51  	keyi := &keyIndex{key: key}
    52  
    53  	ti.Lock()
    54  	defer ti.Unlock()
    55  	item := ti.tree.Get(keyi)
    56  	if item == nil {
    57  		keyi.put(rev.main, rev.sub)
    58  		ti.tree.ReplaceOrInsert(keyi)
    59  		return
    60  	}
    61  	okeyi := item.(*keyIndex)
    62  	okeyi.put(rev.main, rev.sub)
    63  }
    64  
    65  func (ti *treeIndex) Get(key []byte, atRev int64) (modified, created revision, ver int64, err error) {
    66  	keyi := &keyIndex{key: key}
    67  	ti.RLock()
    68  	defer ti.RUnlock()
    69  	if keyi = ti.keyIndex(keyi); keyi == nil {
    70  		return revision{}, revision{}, 0, ErrRevisionNotFound
    71  	}
    72  	return keyi.get(atRev)
    73  }
    74  
    75  func (ti *treeIndex) KeyIndex(keyi *keyIndex) *keyIndex {
    76  	ti.RLock()
    77  	defer ti.RUnlock()
    78  	return ti.keyIndex(keyi)
    79  }
    80  
    81  func (ti *treeIndex) keyIndex(keyi *keyIndex) *keyIndex {
    82  	if item := ti.tree.Get(keyi); item != nil {
    83  		return item.(*keyIndex)
    84  	}
    85  	return nil
    86  }
    87  
    88  func (ti *treeIndex) visit(key, end []byte, f func(ki *keyIndex)) {
    89  	keyi, endi := &keyIndex{key: key}, &keyIndex{key: end}
    90  
    91  	ti.RLock()
    92  	defer ti.RUnlock()
    93  
    94  	ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
    95  		if len(endi.key) > 0 && !item.Less(endi) {
    96  			return false
    97  		}
    98  		f(item.(*keyIndex))
    99  		return true
   100  	})
   101  }
   102  
   103  func (ti *treeIndex) Revisions(key, end []byte, atRev int64) (revs []revision) {
   104  	if end == nil {
   105  		rev, _, _, err := ti.Get(key, atRev)
   106  		if err != nil {
   107  			return nil
   108  		}
   109  		return []revision{rev}
   110  	}
   111  	ti.visit(key, end, func(ki *keyIndex) {
   112  		if rev, _, _, err := ki.get(atRev); err == nil {
   113  			revs = append(revs, rev)
   114  		}
   115  	})
   116  	return revs
   117  }
   118  
   119  func (ti *treeIndex) Range(key, end []byte, atRev int64) (keys [][]byte, revs []revision) {
   120  	if end == nil {
   121  		rev, _, _, err := ti.Get(key, atRev)
   122  		if err != nil {
   123  			return nil, nil
   124  		}
   125  		return [][]byte{key}, []revision{rev}
   126  	}
   127  	ti.visit(key, end, func(ki *keyIndex) {
   128  		if rev, _, _, err := ki.get(atRev); err == nil {
   129  			revs = append(revs, rev)
   130  			keys = append(keys, ki.key)
   131  		}
   132  	})
   133  	return keys, revs
   134  }
   135  
   136  func (ti *treeIndex) Tombstone(key []byte, rev revision) error {
   137  	keyi := &keyIndex{key: key}
   138  
   139  	ti.Lock()
   140  	defer ti.Unlock()
   141  	item := ti.tree.Get(keyi)
   142  	if item == nil {
   143  		return ErrRevisionNotFound
   144  	}
   145  
   146  	ki := item.(*keyIndex)
   147  	return ki.tombstone(rev.main, rev.sub)
   148  }
   149  
   150  // RangeSince returns all revisions from key(including) to end(excluding)
   151  // at or after the given rev. The returned slice is sorted in the order
   152  // of revision.
   153  func (ti *treeIndex) RangeSince(key, end []byte, rev int64) []revision {
   154  	keyi := &keyIndex{key: key}
   155  
   156  	ti.RLock()
   157  	defer ti.RUnlock()
   158  
   159  	if end == nil {
   160  		item := ti.tree.Get(keyi)
   161  		if item == nil {
   162  			return nil
   163  		}
   164  		keyi = item.(*keyIndex)
   165  		return keyi.since(rev)
   166  	}
   167  
   168  	endi := &keyIndex{key: end}
   169  	var revs []revision
   170  	ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
   171  		if len(endi.key) > 0 && !item.Less(endi) {
   172  			return false
   173  		}
   174  		curKeyi := item.(*keyIndex)
   175  		revs = append(revs, curKeyi.since(rev)...)
   176  		return true
   177  	})
   178  	sort.Sort(revisions(revs))
   179  
   180  	return revs
   181  }
   182  
   183  func (ti *treeIndex) Compact(rev int64) map[revision]struct{} {
   184  	available := make(map[revision]struct{})
   185  	var emptyki []*keyIndex
   186  	plog.Printf("store.index: compact %d", rev)
   187  	// TODO: do not hold the lock for long time?
   188  	// This is probably OK. Compacting 10M keys takes O(10ms).
   189  	ti.Lock()
   190  	defer ti.Unlock()
   191  	ti.tree.Ascend(compactIndex(rev, available, &emptyki))
   192  	for _, ki := range emptyki {
   193  		item := ti.tree.Delete(ki)
   194  		if item == nil {
   195  			plog.Panic("store.index: unexpected delete failure during compaction")
   196  		}
   197  	}
   198  	return available
   199  }
   200  
   201  // Keep finds all revisions to be kept for a Compaction at the given rev.
   202  func (ti *treeIndex) Keep(rev int64) map[revision]struct{} {
   203  	available := make(map[revision]struct{})
   204  	ti.RLock()
   205  	defer ti.RUnlock()
   206  	ti.tree.Ascend(func(i btree.Item) bool {
   207  		keyi := i.(*keyIndex)
   208  		keyi.keep(rev, available)
   209  		return true
   210  	})
   211  	return available
   212  }
   213  
   214  func compactIndex(rev int64, available map[revision]struct{}, emptyki *[]*keyIndex) func(i btree.Item) bool {
   215  	return func(i btree.Item) bool {
   216  		keyi := i.(*keyIndex)
   217  		keyi.compact(rev, available)
   218  		if keyi.isEmpty() {
   219  			*emptyki = append(*emptyki, keyi)
   220  		}
   221  		return true
   222  	}
   223  }
   224  
   225  func (ti *treeIndex) Equal(bi index) bool {
   226  	b := bi.(*treeIndex)
   227  
   228  	if ti.tree.Len() != b.tree.Len() {
   229  		return false
   230  	}
   231  
   232  	equal := true
   233  
   234  	ti.tree.Ascend(func(item btree.Item) bool {
   235  		aki := item.(*keyIndex)
   236  		bki := b.tree.Get(item).(*keyIndex)
   237  		if !aki.equal(bki) {
   238  			equal = false
   239  			return false
   240  		}
   241  		return true
   242  	})
   243  
   244  	return equal
   245  }
   246  
   247  func (ti *treeIndex) Insert(ki *keyIndex) {
   248  	ti.Lock()
   249  	defer ti.Unlock()
   250  	ti.tree.ReplaceOrInsert(ki)
   251  }