github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/stateloader/stateloader.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package stateloader
    12  
    13  import (
    14  	"context"
    15  	"math"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/keys"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/storage"
    21  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    23  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    24  	"github.com/cockroachdb/cockroach/pkg/util/log"
    25  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    26  	"github.com/cockroachdb/errors"
    27  	"go.etcd.io/etcd/raft/raftpb"
    28  )
    29  
    30  // StateLoader contains accessor methods to read or write the
    31  // fields of kvserverbase.ReplicaState. It contains an internal buffer
    32  // which is reused to avoid an allocation on frequently-accessed code
    33  // paths.
    34  //
    35  // Because of this internal buffer, this struct is not safe for
    36  // concurrent use, and the return values of methods that return keys
    37  // are invalidated the next time any method is called.
    38  //
    39  // It is safe to have multiple replicaStateLoaders for the same
    40  // Replica. Reusable replicaStateLoaders are typically found in a
    41  // struct with a mutex, and temporary loaders may be created when
    42  // locking is less desirable than an allocation.
    43  type StateLoader struct {
    44  	keys.RangeIDPrefixBuf
    45  }
    46  
    47  // Make creates a a StateLoader.
    48  func Make(rangeID roachpb.RangeID) StateLoader {
    49  	rsl := StateLoader{
    50  		RangeIDPrefixBuf: keys.MakeRangeIDPrefixBuf(rangeID),
    51  	}
    52  	return rsl
    53  }
    54  
    55  // Load a ReplicaState from disk. The exception is the Desc field, which is
    56  // updated transactionally, and is populated from the supplied RangeDescriptor
    57  // under the convention that that is the latest committed version.
    58  func (rsl StateLoader) Load(
    59  	ctx context.Context, reader storage.Reader, desc *roachpb.RangeDescriptor,
    60  ) (kvserverpb.ReplicaState, error) {
    61  	var s kvserverpb.ReplicaState
    62  	// TODO(tschottdorf): figure out whether this is always synchronous with
    63  	// on-disk state (likely iffy during Split/ChangeReplica triggers).
    64  	s.Desc = protoutil.Clone(desc).(*roachpb.RangeDescriptor)
    65  	// Read the range lease.
    66  	lease, err := rsl.LoadLease(ctx, reader)
    67  	if err != nil {
    68  		return kvserverpb.ReplicaState{}, err
    69  	}
    70  	s.Lease = &lease
    71  
    72  	if s.GCThreshold, err = rsl.LoadGCThreshold(ctx, reader); err != nil {
    73  		return kvserverpb.ReplicaState{}, err
    74  	}
    75  
    76  	if as, err := rsl.LoadRangeAppliedState(ctx, reader); err != nil {
    77  		return kvserverpb.ReplicaState{}, err
    78  	} else if as != nil {
    79  		s.UsingAppliedStateKey = true
    80  
    81  		s.RaftAppliedIndex = as.RaftAppliedIndex
    82  		s.LeaseAppliedIndex = as.LeaseAppliedIndex
    83  
    84  		ms := as.RangeStats.ToStats()
    85  		s.Stats = &ms
    86  	} else {
    87  		if s.RaftAppliedIndex, s.LeaseAppliedIndex, err = rsl.LoadAppliedIndex(ctx, reader); err != nil {
    88  			return kvserverpb.ReplicaState{}, err
    89  		}
    90  
    91  		ms, err := rsl.LoadMVCCStats(ctx, reader)
    92  		if err != nil {
    93  			return kvserverpb.ReplicaState{}, err
    94  		}
    95  		s.Stats = &ms
    96  	}
    97  
    98  	// The truncated state should not be optional (i.e. the pointer is
    99  	// pointless), but it is and the migration is not worth it.
   100  	truncState, _, err := rsl.LoadRaftTruncatedState(ctx, reader)
   101  	if err != nil {
   102  		return kvserverpb.ReplicaState{}, err
   103  	}
   104  	s.TruncatedState = &truncState
   105  
   106  	return s, nil
   107  }
   108  
   109  // TruncatedStateType determines whether to use a replicated (legacy) or an
   110  // unreplicated TruncatedState. See VersionUnreplicatedRaftTruncatedStateKey.
   111  type TruncatedStateType int
   112  
   113  const (
   114  	// TruncatedStateLegacyReplicated means use the legacy (replicated) key.
   115  	TruncatedStateLegacyReplicated TruncatedStateType = iota
   116  	// TruncatedStateUnreplicated means use the new (unreplicated) key.
   117  	TruncatedStateUnreplicated
   118  )
   119  
   120  // Save persists the given ReplicaState to disk. It assumes that the contained
   121  // Stats are up-to-date and returns the stats which result from writing the
   122  // updated State.
   123  //
   124  // As an exception to the rule, the Desc field (whose on-disk state is special
   125  // in that it's a full MVCC value and updated transactionally) is only used for
   126  // its RangeID.
   127  //
   128  // TODO(tschottdorf): test and assert that none of the optional values are
   129  // missing whenever save is called. Optional values should be reserved
   130  // strictly for use in Result. Do before merge.
   131  func (rsl StateLoader) Save(
   132  	ctx context.Context,
   133  	readWriter storage.ReadWriter,
   134  	state kvserverpb.ReplicaState,
   135  	truncStateType TruncatedStateType,
   136  ) (enginepb.MVCCStats, error) {
   137  	ms := state.Stats
   138  	if err := rsl.SetLease(ctx, readWriter, ms, *state.Lease); err != nil {
   139  		return enginepb.MVCCStats{}, err
   140  	}
   141  	if err := rsl.SetGCThreshold(ctx, readWriter, ms, state.GCThreshold); err != nil {
   142  		return enginepb.MVCCStats{}, err
   143  	}
   144  	if truncStateType == TruncatedStateLegacyReplicated {
   145  		if err := rsl.SetLegacyRaftTruncatedState(ctx, readWriter, ms, state.TruncatedState); err != nil {
   146  			return enginepb.MVCCStats{}, err
   147  		}
   148  	} else {
   149  		if err := rsl.SetRaftTruncatedState(ctx, readWriter, state.TruncatedState); err != nil {
   150  			return enginepb.MVCCStats{}, err
   151  		}
   152  	}
   153  	if state.UsingAppliedStateKey {
   154  		rai, lai := state.RaftAppliedIndex, state.LeaseAppliedIndex
   155  		if err := rsl.SetRangeAppliedState(ctx, readWriter, rai, lai, ms); err != nil {
   156  			return enginepb.MVCCStats{}, err
   157  		}
   158  	} else {
   159  		if err := rsl.SetLegacyAppliedIndex(
   160  			ctx, readWriter, ms, state.RaftAppliedIndex, state.LeaseAppliedIndex,
   161  		); err != nil {
   162  			return enginepb.MVCCStats{}, err
   163  		}
   164  		if err := rsl.SetLegacyMVCCStats(ctx, readWriter, ms); err != nil {
   165  			return enginepb.MVCCStats{}, err
   166  		}
   167  	}
   168  	return *ms, nil
   169  }
   170  
   171  // LoadLease loads the lease.
   172  func (rsl StateLoader) LoadLease(
   173  	ctx context.Context, reader storage.Reader,
   174  ) (roachpb.Lease, error) {
   175  	var lease roachpb.Lease
   176  	_, err := storage.MVCCGetProto(ctx, reader, rsl.RangeLeaseKey(),
   177  		hlc.Timestamp{}, &lease, storage.MVCCGetOptions{})
   178  	return lease, err
   179  }
   180  
   181  // SetLease persists a lease.
   182  func (rsl StateLoader) SetLease(
   183  	ctx context.Context, readWriter storage.ReadWriter, ms *enginepb.MVCCStats, lease roachpb.Lease,
   184  ) error {
   185  	return storage.MVCCPutProto(ctx, readWriter, ms, rsl.RangeLeaseKey(),
   186  		hlc.Timestamp{}, nil, &lease)
   187  }
   188  
   189  // LoadRangeAppliedState loads the Range applied state. The returned pointer
   190  // will be nil if the applied state key is not found.
   191  func (rsl StateLoader) LoadRangeAppliedState(
   192  	ctx context.Context, reader storage.Reader,
   193  ) (*enginepb.RangeAppliedState, error) {
   194  	var as enginepb.RangeAppliedState
   195  	found, err := storage.MVCCGetProto(ctx, reader, rsl.RangeAppliedStateKey(), hlc.Timestamp{}, &as,
   196  		storage.MVCCGetOptions{})
   197  	if !found {
   198  		return nil, err
   199  	}
   200  	return &as, err
   201  }
   202  
   203  // AssertNoRangeAppliedState asserts that no Range applied state key is present.
   204  func (rsl StateLoader) AssertNoRangeAppliedState(ctx context.Context, reader storage.Reader) error {
   205  	if as, err := rsl.LoadRangeAppliedState(ctx, reader); err != nil {
   206  		return err
   207  	} else if as != nil {
   208  		log.Fatalf(ctx, "unexpected RangeAppliedState present: %v", as)
   209  	}
   210  	return nil
   211  }
   212  
   213  // LoadAppliedIndex returns the Raft applied index and the lease applied index.
   214  func (rsl StateLoader) LoadAppliedIndex(
   215  	ctx context.Context, reader storage.Reader,
   216  ) (raftAppliedIndex uint64, leaseAppliedIndex uint64, err error) {
   217  	// Check the applied state key.
   218  	if as, err := rsl.LoadRangeAppliedState(ctx, reader); err != nil {
   219  		return 0, 0, err
   220  	} else if as != nil {
   221  		return as.RaftAppliedIndex, as.LeaseAppliedIndex, nil
   222  	}
   223  
   224  	// If the range applied state is not found, check the legacy Raft applied
   225  	// index and the lease applied index keys. This is where these indices were
   226  	// stored before the range applied state was introduced.
   227  	v, _, err := storage.MVCCGet(ctx, reader, rsl.RaftAppliedIndexLegacyKey(),
   228  		hlc.Timestamp{}, storage.MVCCGetOptions{})
   229  	if err != nil {
   230  		return 0, 0, err
   231  	}
   232  	if v != nil {
   233  		int64AppliedIndex, err := v.GetInt()
   234  		if err != nil {
   235  			return 0, 0, err
   236  		}
   237  		raftAppliedIndex = uint64(int64AppliedIndex)
   238  	}
   239  	// TODO(tschottdorf): code duplication.
   240  	v, _, err = storage.MVCCGet(ctx, reader, rsl.LeaseAppliedIndexLegacyKey(),
   241  		hlc.Timestamp{}, storage.MVCCGetOptions{})
   242  	if err != nil {
   243  		return 0, 0, err
   244  	}
   245  	if v != nil {
   246  		int64LeaseAppliedIndex, err := v.GetInt()
   247  		if err != nil {
   248  			return 0, 0, err
   249  		}
   250  		leaseAppliedIndex = uint64(int64LeaseAppliedIndex)
   251  	}
   252  	return raftAppliedIndex, leaseAppliedIndex, nil
   253  }
   254  
   255  // LoadMVCCStats loads the MVCC stats.
   256  func (rsl StateLoader) LoadMVCCStats(
   257  	ctx context.Context, reader storage.Reader,
   258  ) (enginepb.MVCCStats, error) {
   259  	// Check the applied state key.
   260  	if as, err := rsl.LoadRangeAppliedState(ctx, reader); err != nil {
   261  		return enginepb.MVCCStats{}, err
   262  	} else if as != nil {
   263  		return as.RangeStats.ToStats(), nil
   264  	}
   265  
   266  	// If the range applied state is not found, check the legacy stats
   267  	// key. This is where stats were stored before the range applied
   268  	// state was introduced.
   269  	var ms enginepb.MVCCStats
   270  	_, err := storage.MVCCGetProto(ctx, reader, rsl.RangeStatsLegacyKey(), hlc.Timestamp{}, &ms,
   271  		storage.MVCCGetOptions{})
   272  	return ms, err
   273  }
   274  
   275  // SetRangeAppliedState overwrites the range applied state. This state is a
   276  // combination of the Raft and lease applied indices, along with the MVCC stats.
   277  //
   278  // The applied indices and the stats used to be stored separately in different
   279  // keys. We now deem those keys to be "legacy" because they have been replaced
   280  // by the range applied state key.
   281  func (rsl StateLoader) SetRangeAppliedState(
   282  	ctx context.Context,
   283  	readWriter storage.ReadWriter,
   284  	appliedIndex, leaseAppliedIndex uint64,
   285  	newMS *enginepb.MVCCStats,
   286  ) error {
   287  	as := enginepb.RangeAppliedState{
   288  		RaftAppliedIndex:  appliedIndex,
   289  		LeaseAppliedIndex: leaseAppliedIndex,
   290  		RangeStats:        newMS.ToPersistentStats(),
   291  	}
   292  	// The RangeAppliedStateKey is not included in stats. This is also reflected
   293  	// in C.MVCCComputeStats and ComputeStatsGo.
   294  	ms := (*enginepb.MVCCStats)(nil)
   295  	return storage.MVCCPutProto(ctx, readWriter, ms, rsl.RangeAppliedStateKey(), hlc.Timestamp{}, nil, &as)
   296  }
   297  
   298  // MigrateToRangeAppliedStateKey deletes the keys that were replaced by the
   299  // RangeAppliedState key.
   300  func (rsl StateLoader) MigrateToRangeAppliedStateKey(
   301  	ctx context.Context, readWriter storage.ReadWriter, ms *enginepb.MVCCStats,
   302  ) error {
   303  	noTS := hlc.Timestamp{}
   304  	if err := storage.MVCCDelete(ctx, readWriter, ms, rsl.RaftAppliedIndexLegacyKey(), noTS, nil); err != nil {
   305  		return err
   306  	}
   307  	if err := storage.MVCCDelete(ctx, readWriter, ms, rsl.LeaseAppliedIndexLegacyKey(), noTS, nil); err != nil {
   308  		return err
   309  	}
   310  	return storage.MVCCDelete(ctx, readWriter, ms, rsl.RangeStatsLegacyKey(), noTS, nil)
   311  }
   312  
   313  // SetLegacyAppliedIndex sets the legacy {raft,lease} applied index values,
   314  // properly accounting for existing keys in the returned stats.
   315  //
   316  // The range applied state key cannot already exist or an assetion will be
   317  // triggered. See comment on SetRangeAppliedState for why this is "legacy".
   318  func (rsl StateLoader) SetLegacyAppliedIndex(
   319  	ctx context.Context,
   320  	readWriter storage.ReadWriter,
   321  	ms *enginepb.MVCCStats,
   322  	appliedIndex, leaseAppliedIndex uint64,
   323  ) error {
   324  	if err := rsl.AssertNoRangeAppliedState(ctx, readWriter); err != nil {
   325  		return err
   326  	}
   327  
   328  	var value roachpb.Value
   329  	value.SetInt(int64(appliedIndex))
   330  	if err := storage.MVCCPut(ctx, readWriter, ms,
   331  		rsl.RaftAppliedIndexLegacyKey(),
   332  		hlc.Timestamp{},
   333  		value,
   334  		nil /* txn */); err != nil {
   335  		return err
   336  	}
   337  	value.SetInt(int64(leaseAppliedIndex))
   338  	return storage.MVCCPut(ctx, readWriter, ms,
   339  		rsl.LeaseAppliedIndexLegacyKey(),
   340  		hlc.Timestamp{},
   341  		value,
   342  		nil /* txn */)
   343  }
   344  
   345  // SetLegacyAppliedIndexBlind sets the legacy {raft,lease} applied index values
   346  // using a "blind" put which ignores any existing keys. This is identical to
   347  // SetLegacyAppliedIndex but is used to optimize the writing of the applied
   348  // index values during write operations where we definitively know the size of
   349  // the previous values.
   350  //
   351  // The range applied state key cannot already exist or an assetion will be
   352  // triggered. See comment on SetRangeAppliedState for why this is "legacy".
   353  func (rsl StateLoader) SetLegacyAppliedIndexBlind(
   354  	ctx context.Context,
   355  	readWriter storage.ReadWriter,
   356  	ms *enginepb.MVCCStats,
   357  	appliedIndex, leaseAppliedIndex uint64,
   358  ) error {
   359  	if err := rsl.AssertNoRangeAppliedState(ctx, readWriter); err != nil {
   360  		return err
   361  	}
   362  
   363  	var value roachpb.Value
   364  	value.SetInt(int64(appliedIndex))
   365  	if err := storage.MVCCBlindPut(ctx, readWriter, ms,
   366  		rsl.RaftAppliedIndexLegacyKey(),
   367  		hlc.Timestamp{},
   368  		value,
   369  		nil /* txn */); err != nil {
   370  		return err
   371  	}
   372  	value.SetInt(int64(leaseAppliedIndex))
   373  	return storage.MVCCBlindPut(ctx, readWriter, ms,
   374  		rsl.LeaseAppliedIndexLegacyKey(),
   375  		hlc.Timestamp{},
   376  		value,
   377  		nil /* txn */)
   378  }
   379  
   380  func inlineValueIntEncodedSize(v int64) int {
   381  	var value roachpb.Value
   382  	value.SetInt(v)
   383  	meta := enginepb.MVCCMetadata{RawBytes: value.RawBytes}
   384  	return meta.Size()
   385  }
   386  
   387  // CalcAppliedIndexSysBytes calculates the size (MVCCStats.SysBytes) of the {raft,lease} applied
   388  // index keys/values.
   389  func (rsl StateLoader) CalcAppliedIndexSysBytes(appliedIndex, leaseAppliedIndex uint64) int64 {
   390  	return int64(storage.MakeMVCCMetadataKey(rsl.RaftAppliedIndexLegacyKey()).EncodedSize() +
   391  		storage.MakeMVCCMetadataKey(rsl.LeaseAppliedIndexLegacyKey()).EncodedSize() +
   392  		inlineValueIntEncodedSize(int64(appliedIndex)) +
   393  		inlineValueIntEncodedSize(int64(leaseAppliedIndex)))
   394  }
   395  
   396  func (rsl StateLoader) writeLegacyMVCCStatsInternal(
   397  	ctx context.Context, readWriter storage.ReadWriter, newMS *enginepb.MVCCStats,
   398  ) error {
   399  	// NB: newMS is copied to prevent conditional calls to this method from
   400  	// causing the stats argument to escape. This is legacy code which does
   401  	// not need to be optimized for performance.
   402  	newMSCopy := *newMS
   403  	return storage.MVCCPutProto(ctx, readWriter, nil, rsl.RangeStatsLegacyKey(), hlc.Timestamp{}, nil, &newMSCopy)
   404  }
   405  
   406  // SetLegacyMVCCStats overwrites the legacy MVCC stats key.
   407  //
   408  // The range applied state key cannot already exist or an assetion will be
   409  // triggered. See comment on SetRangeAppliedState for why this is "legacy".
   410  func (rsl StateLoader) SetLegacyMVCCStats(
   411  	ctx context.Context, readWriter storage.ReadWriter, newMS *enginepb.MVCCStats,
   412  ) error {
   413  	if err := rsl.AssertNoRangeAppliedState(ctx, readWriter); err != nil {
   414  		return err
   415  	}
   416  
   417  	return rsl.writeLegacyMVCCStatsInternal(ctx, readWriter, newMS)
   418  }
   419  
   420  // SetMVCCStats overwrites the MVCC stats. This needs to perform a read on the
   421  // RangeAppliedState key before overwriting the stats. Use SetRangeAppliedState
   422  // when performance is important.
   423  func (rsl StateLoader) SetMVCCStats(
   424  	ctx context.Context, readWriter storage.ReadWriter, newMS *enginepb.MVCCStats,
   425  ) error {
   426  	if as, err := rsl.LoadRangeAppliedState(ctx, readWriter); err != nil {
   427  		return err
   428  	} else if as != nil {
   429  		return rsl.SetRangeAppliedState(ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, newMS)
   430  	}
   431  
   432  	return rsl.writeLegacyMVCCStatsInternal(ctx, readWriter, newMS)
   433  }
   434  
   435  // SetLegacyRaftTruncatedState overwrites the truncated state.
   436  func (rsl StateLoader) SetLegacyRaftTruncatedState(
   437  	ctx context.Context,
   438  	readWriter storage.ReadWriter,
   439  	ms *enginepb.MVCCStats,
   440  	truncState *roachpb.RaftTruncatedState,
   441  ) error {
   442  	if (*truncState == roachpb.RaftTruncatedState{}) {
   443  		return errors.New("cannot persist empty RaftTruncatedState")
   444  	}
   445  	return storage.MVCCPutProto(ctx, readWriter, ms,
   446  		rsl.RaftTruncatedStateLegacyKey(), hlc.Timestamp{}, nil, truncState)
   447  }
   448  
   449  // LoadGCThreshold loads the GC threshold.
   450  func (rsl StateLoader) LoadGCThreshold(
   451  	ctx context.Context, reader storage.Reader,
   452  ) (*hlc.Timestamp, error) {
   453  	var t hlc.Timestamp
   454  	_, err := storage.MVCCGetProto(ctx, reader, rsl.RangeLastGCKey(),
   455  		hlc.Timestamp{}, &t, storage.MVCCGetOptions{})
   456  	return &t, err
   457  }
   458  
   459  // SetGCThreshold sets the GC threshold.
   460  func (rsl StateLoader) SetGCThreshold(
   461  	ctx context.Context,
   462  	readWriter storage.ReadWriter,
   463  	ms *enginepb.MVCCStats,
   464  	threshold *hlc.Timestamp,
   465  ) error {
   466  	if threshold == nil {
   467  		return errors.New("cannot persist nil GCThreshold")
   468  	}
   469  	return storage.MVCCPutProto(ctx, readWriter, ms,
   470  		rsl.RangeLastGCKey(), hlc.Timestamp{}, nil, threshold)
   471  }
   472  
   473  // The rest is not technically part of ReplicaState.
   474  
   475  // LoadLastIndex loads the last index.
   476  func (rsl StateLoader) LoadLastIndex(ctx context.Context, reader storage.Reader) (uint64, error) {
   477  	prefix := rsl.RaftLogPrefix()
   478  	iter := reader.NewIterator(storage.IterOptions{LowerBound: prefix})
   479  	defer iter.Close()
   480  
   481  	var lastIndex uint64
   482  	iter.SeekLT(storage.MakeMVCCMetadataKey(rsl.RaftLogKey(math.MaxUint64)))
   483  	if ok, _ := iter.Valid(); ok {
   484  		key := iter.Key()
   485  		var err error
   486  		_, lastIndex, err = encoding.DecodeUint64Ascending(key.Key[len(prefix):])
   487  		if err != nil {
   488  			log.Fatalf(ctx, "unable to decode Raft log index key: %s", key)
   489  		}
   490  	}
   491  
   492  	if lastIndex == 0 {
   493  		// The log is empty, which means we are either starting from scratch
   494  		// or the entire log has been truncated away.
   495  		lastEnt, _, err := rsl.LoadRaftTruncatedState(ctx, reader)
   496  		if err != nil {
   497  			return 0, err
   498  		}
   499  		lastIndex = lastEnt.Index
   500  	}
   501  	return lastIndex, nil
   502  }
   503  
   504  // LoadRaftTruncatedState loads the truncated state. The returned boolean returns
   505  // whether the result was read from the TruncatedStateLegacyKey. If both keys
   506  // are missing, it is false which is used to migrate into the unreplicated key.
   507  //
   508  // See VersionUnreplicatedRaftTruncatedState.
   509  func (rsl StateLoader) LoadRaftTruncatedState(
   510  	ctx context.Context, reader storage.Reader,
   511  ) (_ roachpb.RaftTruncatedState, isLegacy bool, _ error) {
   512  	var truncState roachpb.RaftTruncatedState
   513  	if found, err := storage.MVCCGetProto(
   514  		ctx, reader, rsl.RaftTruncatedStateKey(), hlc.Timestamp{}, &truncState, storage.MVCCGetOptions{},
   515  	); err != nil {
   516  		return roachpb.RaftTruncatedState{}, false, err
   517  	} else if found {
   518  		return truncState, false, nil
   519  	}
   520  
   521  	// If the "new" truncated state isn't there (yet), fall back to the legacy
   522  	// truncated state. The next log truncation will atomically rewrite them
   523  	// assuming the cluster version has advanced sufficiently.
   524  	//
   525  	// See VersionUnreplicatedRaftTruncatedState.
   526  	legacyFound, err := storage.MVCCGetProto(
   527  		ctx, reader, rsl.RaftTruncatedStateLegacyKey(), hlc.Timestamp{}, &truncState, storage.MVCCGetOptions{},
   528  	)
   529  	if err != nil {
   530  		return roachpb.RaftTruncatedState{}, false, err
   531  	}
   532  	return truncState, legacyFound, nil
   533  }
   534  
   535  // SetRaftTruncatedState overwrites the truncated state.
   536  func (rsl StateLoader) SetRaftTruncatedState(
   537  	ctx context.Context, writer storage.Writer, truncState *roachpb.RaftTruncatedState,
   538  ) error {
   539  	if (*truncState == roachpb.RaftTruncatedState{}) {
   540  		return errors.New("cannot persist empty RaftTruncatedState")
   541  	}
   542  	// "Blind" because ms == nil and timestamp == hlc.Timestamp{}.
   543  	return storage.MVCCBlindPutProto(
   544  		ctx,
   545  		writer,
   546  		nil, /* ms */
   547  		rsl.RaftTruncatedStateKey(),
   548  		hlc.Timestamp{}, /* timestamp */
   549  		truncState,
   550  		nil, /* txn */
   551  	)
   552  }
   553  
   554  // LoadHardState loads the HardState.
   555  func (rsl StateLoader) LoadHardState(
   556  	ctx context.Context, reader storage.Reader,
   557  ) (raftpb.HardState, error) {
   558  	var hs raftpb.HardState
   559  	found, err := storage.MVCCGetProto(ctx, reader, rsl.RaftHardStateKey(),
   560  		hlc.Timestamp{}, &hs, storage.MVCCGetOptions{})
   561  
   562  	if !found || err != nil {
   563  		return raftpb.HardState{}, err
   564  	}
   565  	return hs, nil
   566  }
   567  
   568  // SetHardState overwrites the HardState.
   569  func (rsl StateLoader) SetHardState(
   570  	ctx context.Context, writer storage.Writer, hs raftpb.HardState,
   571  ) error {
   572  	// "Blind" because ms == nil and timestamp == hlc.Timestamp{}.
   573  	return storage.MVCCBlindPutProto(
   574  		ctx,
   575  		writer,
   576  		nil, /* ms */
   577  		rsl.RaftHardStateKey(),
   578  		hlc.Timestamp{}, /* timestamp */
   579  		&hs,
   580  		nil, /* txn */
   581  	)
   582  }
   583  
   584  // SynthesizeRaftState creates a Raft state which synthesizes both a HardState
   585  // and a lastIndex from pre-seeded data in the engine (typically created via
   586  // writeInitialReplicaState and, on a split, perhaps the activity of an
   587  // uninitialized Raft group)
   588  func (rsl StateLoader) SynthesizeRaftState(
   589  	ctx context.Context, readWriter storage.ReadWriter,
   590  ) error {
   591  	hs, err := rsl.LoadHardState(ctx, readWriter)
   592  	if err != nil {
   593  		return err
   594  	}
   595  	truncState, _, err := rsl.LoadRaftTruncatedState(ctx, readWriter)
   596  	if err != nil {
   597  		return err
   598  	}
   599  	raftAppliedIndex, _, err := rsl.LoadAppliedIndex(ctx, readWriter)
   600  	if err != nil {
   601  		return err
   602  	}
   603  	return rsl.SynthesizeHardState(ctx, readWriter, hs, truncState, raftAppliedIndex)
   604  }
   605  
   606  // SynthesizeHardState synthesizes an on-disk HardState from the given input,
   607  // taking care that a HardState compatible with the existing data is written.
   608  func (rsl StateLoader) SynthesizeHardState(
   609  	ctx context.Context,
   610  	readWriter storage.ReadWriter,
   611  	oldHS raftpb.HardState,
   612  	truncState roachpb.RaftTruncatedState,
   613  	raftAppliedIndex uint64,
   614  ) error {
   615  	newHS := raftpb.HardState{
   616  		Term: truncState.Term,
   617  		// Note that when applying a Raft snapshot, the applied index is
   618  		// equal to the Commit index represented by the snapshot.
   619  		Commit: raftAppliedIndex,
   620  	}
   621  
   622  	if oldHS.Commit > newHS.Commit {
   623  		return errors.Newf("can't decrease HardState.Commit from %d to %d",
   624  			log.Safe(oldHS.Commit), log.Safe(newHS.Commit))
   625  	}
   626  	if oldHS.Term > newHS.Term {
   627  		// The existing HardState is allowed to be ahead of us, which is
   628  		// relevant in practice for the split trigger. We already checked above
   629  		// that we're not rewinding the acknowledged index, and we haven't
   630  		// updated votes yet.
   631  		newHS.Term = oldHS.Term
   632  	}
   633  	// If the existing HardState voted in this term, remember that.
   634  	if oldHS.Term == newHS.Term {
   635  		newHS.Vote = oldHS.Vote
   636  	}
   637  	err := rsl.SetHardState(ctx, readWriter, newHS)
   638  	return errors.Wrapf(err, "writing HardState %+v", &newHS)
   639  }