go.etcd.io/etcd@v3.3.27+incompatible/raft/storage.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package raft
    16  
    17  import (
    18  	"errors"
    19  	"sync"
    20  
    21  	pb "github.com/coreos/etcd/raft/raftpb"
    22  )
    23  
    24  // ErrCompacted is returned by Storage.Entries/Compact when a requested
    25  // index is unavailable because it predates the last snapshot.
    26  var ErrCompacted = errors.New("requested index is unavailable due to compaction")
    27  
    28  // ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested
    29  // index is older than the existing snapshot.
    30  var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot")
    31  
    32  // ErrUnavailable is returned by Storage interface when the requested log entries
    33  // are unavailable.
    34  var ErrUnavailable = errors.New("requested entry at index is unavailable")
    35  
    36  // ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required
    37  // snapshot is temporarily unavailable.
    38  var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable")
    39  
    40  // Storage is an interface that may be implemented by the application
    41  // to retrieve log entries from storage.
    42  //
    43  // If any Storage method returns an error, the raft instance will
    44  // become inoperable and refuse to participate in elections; the
    45  // application is responsible for cleanup and recovery in this case.
    46  type Storage interface {
    47  	// InitialState returns the saved HardState and ConfState information.
    48  	InitialState() (pb.HardState, pb.ConfState, error)
    49  	// Entries returns a slice of log entries in the range [lo,hi).
    50  	// MaxSize limits the total size of the log entries returned, but
    51  	// Entries returns at least one entry if any.
    52  	Entries(lo, hi, maxSize uint64) ([]pb.Entry, error)
    53  	// Term returns the term of entry i, which must be in the range
    54  	// [FirstIndex()-1, LastIndex()]. The term of the entry before
    55  	// FirstIndex is retained for matching purposes even though the
    56  	// rest of that entry may not be available.
    57  	Term(i uint64) (uint64, error)
    58  	// LastIndex returns the index of the last entry in the log.
    59  	LastIndex() (uint64, error)
    60  	// FirstIndex returns the index of the first log entry that is
    61  	// possibly available via Entries (older entries have been incorporated
    62  	// into the latest Snapshot; if storage only contains the dummy entry the
    63  	// first log entry is not available).
    64  	FirstIndex() (uint64, error)
    65  	// Snapshot returns the most recent snapshot.
    66  	// If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable,
    67  	// so raft state machine could know that Storage needs some time to prepare
    68  	// snapshot and call Snapshot later.
    69  	Snapshot() (pb.Snapshot, error)
    70  }
    71  
    72  // MemoryStorage implements the Storage interface backed by an
    73  // in-memory array.
    74  type MemoryStorage struct {
    75  	// Protects access to all fields. Most methods of MemoryStorage are
    76  	// run on the raft goroutine, but Append() is run on an application
    77  	// goroutine.
    78  	sync.Mutex
    79  
    80  	hardState pb.HardState
    81  	snapshot  pb.Snapshot
    82  	// ents[i] has raft log position i+snapshot.Metadata.Index
    83  	ents []pb.Entry
    84  }
    85  
    86  // NewMemoryStorage creates an empty MemoryStorage.
    87  func NewMemoryStorage() *MemoryStorage {
    88  	return &MemoryStorage{
    89  		// When starting from scratch populate the list with a dummy entry at term zero.
    90  		ents: make([]pb.Entry, 1),
    91  	}
    92  }
    93  
    94  // InitialState implements the Storage interface.
    95  func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) {
    96  	return ms.hardState, ms.snapshot.Metadata.ConfState, nil
    97  }
    98  
    99  // SetHardState saves the current HardState.
   100  func (ms *MemoryStorage) SetHardState(st pb.HardState) error {
   101  	ms.Lock()
   102  	defer ms.Unlock()
   103  	ms.hardState = st
   104  	return nil
   105  }
   106  
   107  // Entries implements the Storage interface.
   108  func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) {
   109  	ms.Lock()
   110  	defer ms.Unlock()
   111  	offset := ms.ents[0].Index
   112  	if lo <= offset {
   113  		return nil, ErrCompacted
   114  	}
   115  	if hi > ms.lastIndex()+1 {
   116  		raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex())
   117  	}
   118  	// only contains dummy entries.
   119  	if len(ms.ents) == 1 {
   120  		return nil, ErrUnavailable
   121  	}
   122  
   123  	ents := ms.ents[lo-offset : hi-offset]
   124  	return limitSize(ents, maxSize), nil
   125  }
   126  
   127  // Term implements the Storage interface.
   128  func (ms *MemoryStorage) Term(i uint64) (uint64, error) {
   129  	ms.Lock()
   130  	defer ms.Unlock()
   131  	offset := ms.ents[0].Index
   132  	if i < offset {
   133  		return 0, ErrCompacted
   134  	}
   135  	if int(i-offset) >= len(ms.ents) {
   136  		return 0, ErrUnavailable
   137  	}
   138  	return ms.ents[i-offset].Term, nil
   139  }
   140  
   141  // LastIndex implements the Storage interface.
   142  func (ms *MemoryStorage) LastIndex() (uint64, error) {
   143  	ms.Lock()
   144  	defer ms.Unlock()
   145  	return ms.lastIndex(), nil
   146  }
   147  
   148  func (ms *MemoryStorage) lastIndex() uint64 {
   149  	return ms.ents[0].Index + uint64(len(ms.ents)) - 1
   150  }
   151  
   152  // FirstIndex implements the Storage interface.
   153  func (ms *MemoryStorage) FirstIndex() (uint64, error) {
   154  	ms.Lock()
   155  	defer ms.Unlock()
   156  	return ms.firstIndex(), nil
   157  }
   158  
   159  func (ms *MemoryStorage) firstIndex() uint64 {
   160  	return ms.ents[0].Index + 1
   161  }
   162  
   163  // Snapshot implements the Storage interface.
   164  func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) {
   165  	ms.Lock()
   166  	defer ms.Unlock()
   167  	return ms.snapshot, nil
   168  }
   169  
   170  // ApplySnapshot overwrites the contents of this Storage object with
   171  // those of the given snapshot.
   172  func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error {
   173  	ms.Lock()
   174  	defer ms.Unlock()
   175  
   176  	//handle check for old snapshot being applied
   177  	msIndex := ms.snapshot.Metadata.Index
   178  	snapIndex := snap.Metadata.Index
   179  	if msIndex >= snapIndex {
   180  		return ErrSnapOutOfDate
   181  	}
   182  
   183  	ms.snapshot = snap
   184  	ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}}
   185  	return nil
   186  }
   187  
   188  // CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and
   189  // can be used to reconstruct the state at that point.
   190  // If any configuration changes have been made since the last compaction,
   191  // the result of the last ApplyConfChange must be passed in.
   192  func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) {
   193  	ms.Lock()
   194  	defer ms.Unlock()
   195  	if i <= ms.snapshot.Metadata.Index {
   196  		return pb.Snapshot{}, ErrSnapOutOfDate
   197  	}
   198  
   199  	offset := ms.ents[0].Index
   200  	if i > ms.lastIndex() {
   201  		raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex())
   202  	}
   203  
   204  	ms.snapshot.Metadata.Index = i
   205  	ms.snapshot.Metadata.Term = ms.ents[i-offset].Term
   206  	if cs != nil {
   207  		ms.snapshot.Metadata.ConfState = *cs
   208  	}
   209  	ms.snapshot.Data = data
   210  	return ms.snapshot, nil
   211  }
   212  
   213  // Compact discards all log entries prior to compactIndex.
   214  // It is the application's responsibility to not attempt to compact an index
   215  // greater than raftLog.applied.
   216  func (ms *MemoryStorage) Compact(compactIndex uint64) error {
   217  	ms.Lock()
   218  	defer ms.Unlock()
   219  	offset := ms.ents[0].Index
   220  	if compactIndex <= offset {
   221  		return ErrCompacted
   222  	}
   223  	if compactIndex > ms.lastIndex() {
   224  		raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex())
   225  	}
   226  
   227  	i := compactIndex - offset
   228  	ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i)
   229  	ents[0].Index = ms.ents[i].Index
   230  	ents[0].Term = ms.ents[i].Term
   231  	ents = append(ents, ms.ents[i+1:]...)
   232  	ms.ents = ents
   233  	return nil
   234  }
   235  
   236  // Append the new entries to storage.
   237  // TODO (xiangli): ensure the entries are continuous and
   238  // entries[0].Index > ms.entries[0].Index
   239  func (ms *MemoryStorage) Append(entries []pb.Entry) error {
   240  	if len(entries) == 0 {
   241  		return nil
   242  	}
   243  
   244  	ms.Lock()
   245  	defer ms.Unlock()
   246  
   247  	first := ms.firstIndex()
   248  	last := entries[0].Index + uint64(len(entries)) - 1
   249  
   250  	// shortcut if there is no new entry.
   251  	if last < first {
   252  		return nil
   253  	}
   254  	// truncate compacted entries
   255  	if first > entries[0].Index {
   256  		entries = entries[first-entries[0].Index:]
   257  	}
   258  
   259  	offset := entries[0].Index - ms.ents[0].Index
   260  	switch {
   261  	case uint64(len(ms.ents)) > offset:
   262  		ms.ents = append([]pb.Entry{}, ms.ents[:offset]...)
   263  		ms.ents = append(ms.ents, entries...)
   264  	case uint64(len(ms.ents)) == offset:
   265  		ms.ents = append(ms.ents, entries...)
   266  	default:
   267  		raftLogger.Panicf("missing log entry [last: %d, append at: %d]",
   268  			ms.lastIndex(), entries[0].Index)
   269  	}
   270  	return nil
   271  }