github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_sideload.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    17  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/util/log"
    20  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    21  	"github.com/cockroachdb/errors"
    22  	"go.etcd.io/etcd/raft/raftpb"
    23  )
    24  
    25  var errSideloadedFileNotFound = errors.New("sideloaded file not found")
    26  
    27  // SideloadStorage is the interface used for Raft SSTable sideloading.
    28  // Implementations do not need to be thread safe.
    29  type SideloadStorage interface {
    30  	// The directory in which the sideloaded files are stored. May or may not
    31  	// exist.
    32  	Dir() string
    33  	// Writes the given contents to the file specified by the given index and
    34  	// term. Overwrites the file if it already exists.
    35  	Put(_ context.Context, index, term uint64, contents []byte) error
    36  	// Load the file at the given index and term. Return errSideloadedFileNotFound when no
    37  	// such file is present.
    38  	Get(_ context.Context, index, term uint64) ([]byte, error)
    39  	// Purge removes the file at the given index and term. It may also
    40  	// remove any leftover files at the same index and earlier terms, but
    41  	// is not required to do so. When no file at the given index and term
    42  	// exists, returns errSideloadedFileNotFound.
    43  	//
    44  	// Returns the total size of the purged payloads.
    45  	Purge(_ context.Context, index, term uint64) (int64, error)
    46  	// Clear files that may have been written by this SideloadStorage.
    47  	Clear(context.Context) error
    48  	// TruncateTo removes all files belonging to an index strictly smaller than
    49  	// the given one. Returns the number of bytes freed, the number of bytes in
    50  	// files that remain, or an error.
    51  	TruncateTo(_ context.Context, index uint64) (freed, retained int64, _ error)
    52  	// Returns an absolute path to the file that Get() would return the contents
    53  	// of. Does not check whether the file actually exists.
    54  	Filename(_ context.Context, index, term uint64) (string, error)
    55  }
    56  
    57  // maybeSideloadEntriesRaftMuLocked should be called with a slice of "fat"
    58  // entries before appending them to the Raft log. For those entries which are
    59  // sideloadable, this is where the actual sideloading happens: in come fat
    60  // proposals, out go thin proposals. Note that this method is to be called
    61  // before modifications are persisted to the log. The other way around is
    62  // incorrect since an ill-timed crash gives you thin proposals and no files.
    63  //
    64  // The passed-in slice is not mutated.
    65  func (r *Replica) maybeSideloadEntriesRaftMuLocked(
    66  	ctx context.Context, entriesToAppend []raftpb.Entry,
    67  ) (_ []raftpb.Entry, sideloadedEntriesSize int64, _ error) {
    68  	return maybeSideloadEntriesImpl(ctx, entriesToAppend, r.raftMu.sideloaded)
    69  }
    70  
    71  // maybeSideloadEntriesImpl iterates through the provided slice of entries. If
    72  // no sideloadable entries are found, it returns the same slice. Otherwise, it
    73  // returns a new slice in which all applicable entries have been sideloaded to
    74  // the specified SideloadStorage.
    75  func maybeSideloadEntriesImpl(
    76  	ctx context.Context, entriesToAppend []raftpb.Entry, sideloaded SideloadStorage,
    77  ) (_ []raftpb.Entry, sideloadedEntriesSize int64, _ error) {
    78  
    79  	cow := false
    80  	for i := range entriesToAppend {
    81  		if sniffSideloadedRaftCommand(entriesToAppend[i].Data) {
    82  			log.Event(ctx, "sideloading command in append")
    83  			if !cow {
    84  				// Avoid mutating the passed-in entries directly. The caller
    85  				// wants them to remain "fat".
    86  				log.Eventf(ctx, "copying entries slice of length %d", len(entriesToAppend))
    87  				cow = true
    88  				entriesToAppend = append([]raftpb.Entry(nil), entriesToAppend...)
    89  			}
    90  
    91  			ent := &entriesToAppend[i]
    92  			cmdID, data := DecodeRaftCommand(ent.Data) // cheap
    93  
    94  			// Unmarshal the command into an object that we can mutate.
    95  			var strippedCmd kvserverpb.RaftCommand
    96  			if err := protoutil.Unmarshal(data, &strippedCmd); err != nil {
    97  				return nil, 0, err
    98  			}
    99  
   100  			if strippedCmd.ReplicatedEvalResult.AddSSTable == nil {
   101  				// Still no AddSSTable; someone must've proposed a v2 command
   102  				// but not because it contains an inlined SSTable. Strange, but
   103  				// let's be future proof.
   104  				log.Warning(ctx, "encountered sideloaded Raft command without inlined payload")
   105  				continue
   106  			}
   107  
   108  			// Actually strip the command.
   109  			dataToSideload := strippedCmd.ReplicatedEvalResult.AddSSTable.Data
   110  			strippedCmd.ReplicatedEvalResult.AddSSTable.Data = nil
   111  
   112  			// Marshal the command and attach to the Raft entry.
   113  			{
   114  				data := make([]byte, raftCommandPrefixLen+strippedCmd.Size())
   115  				encodeRaftCommandPrefix(data[:raftCommandPrefixLen], raftVersionSideloaded, cmdID)
   116  				_, err := protoutil.MarshalTo(&strippedCmd, data[raftCommandPrefixLen:])
   117  				if err != nil {
   118  					return nil, 0, errors.Wrap(err, "while marshaling stripped sideloaded command")
   119  				}
   120  				ent.Data = data
   121  			}
   122  
   123  			log.Eventf(ctx, "writing payload at index=%d term=%d", ent.Index, ent.Term)
   124  			if err := sideloaded.Put(ctx, ent.Index, ent.Term, dataToSideload); err != nil {
   125  				return nil, 0, err
   126  			}
   127  			sideloadedEntriesSize += int64(len(dataToSideload))
   128  		}
   129  	}
   130  	return entriesToAppend, sideloadedEntriesSize, nil
   131  }
   132  
   133  func sniffSideloadedRaftCommand(data []byte) (sideloaded bool) {
   134  	return len(data) > 0 && data[0] == byte(raftVersionSideloaded)
   135  }
   136  
   137  // maybeInlineSideloadedRaftCommand takes an entry and inspects it. If its
   138  // command encoding version indicates a sideloaded entry, it uses the entryCache
   139  // or SideloadStorage to inline the payload, returning a new entry (which must
   140  // be treated as immutable by the caller) or nil (if inlining does not apply)
   141  //
   142  // If a payload is missing, returns an error whose Cause() is
   143  // errSideloadedFileNotFound.
   144  func maybeInlineSideloadedRaftCommand(
   145  	ctx context.Context,
   146  	rangeID roachpb.RangeID,
   147  	ent raftpb.Entry,
   148  	sideloaded SideloadStorage,
   149  	entryCache *raftentry.Cache,
   150  ) (*raftpb.Entry, error) {
   151  	if !sniffSideloadedRaftCommand(ent.Data) {
   152  		return nil, nil
   153  	}
   154  	log.Event(ctx, "inlining sideloaded SSTable")
   155  	// We could unmarshal this yet again, but if it's committed we
   156  	// are very likely to have appended it recently, in which case
   157  	// we can save work.
   158  	cachedSingleton, _, _, _ := entryCache.Scan(
   159  		nil, rangeID, ent.Index, ent.Index+1, 1<<20,
   160  	)
   161  
   162  	if len(cachedSingleton) > 0 {
   163  		log.Event(ctx, "using cache hit")
   164  		return &cachedSingleton[0], nil
   165  	}
   166  
   167  	// Make a shallow copy.
   168  	entCpy := ent
   169  	ent = entCpy
   170  
   171  	log.Event(ctx, "inlined entry not cached")
   172  	// Out of luck, for whatever reason the inlined proposal isn't in the cache.
   173  	cmdID, data := DecodeRaftCommand(ent.Data)
   174  
   175  	var command kvserverpb.RaftCommand
   176  	if err := protoutil.Unmarshal(data, &command); err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	if len(command.ReplicatedEvalResult.AddSSTable.Data) > 0 {
   181  		// The entry we started out with was already "fat". This happens when
   182  		// the entry reached us through a preemptive snapshot (when we didn't
   183  		// have a ReplicaID yet).
   184  		log.Event(ctx, "entry already inlined")
   185  		return &ent, nil
   186  	}
   187  
   188  	sideloadedData, err := sideloaded.Get(ctx, ent.Index, ent.Term)
   189  	if err != nil {
   190  		return nil, errors.Wrap(err, "loading sideloaded data")
   191  	}
   192  	command.ReplicatedEvalResult.AddSSTable.Data = sideloadedData
   193  	{
   194  		data := make([]byte, raftCommandPrefixLen+command.Size())
   195  		encodeRaftCommandPrefix(data[:raftCommandPrefixLen], raftVersionSideloaded, cmdID)
   196  		_, err := protoutil.MarshalTo(&command, data[raftCommandPrefixLen:])
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  		ent.Data = data
   201  	}
   202  	return &ent, nil
   203  }
   204  
   205  // assertSideloadedRaftCommandInlined asserts that if the provided entry is a
   206  // sideloaded entry, then its payload has already been inlined. Doing so
   207  // requires unmarshalling the raft command, so this assertion should be kept out
   208  // of performance critical paths.
   209  func assertSideloadedRaftCommandInlined(ctx context.Context, ent *raftpb.Entry) {
   210  	if !sniffSideloadedRaftCommand(ent.Data) {
   211  		return
   212  	}
   213  
   214  	var command kvserverpb.RaftCommand
   215  	_, data := DecodeRaftCommand(ent.Data)
   216  	if err := protoutil.Unmarshal(data, &command); err != nil {
   217  		log.Fatalf(ctx, "%v", err)
   218  	}
   219  
   220  	if len(command.ReplicatedEvalResult.AddSSTable.Data) == 0 {
   221  		// The entry is "thin", which is what this assertion is checking for.
   222  		log.Fatalf(ctx, "found thin sideloaded raft command: %+v", command)
   223  	}
   224  }
   225  
   226  // maybePurgeSideloaded removes [firstIndex, ..., lastIndex] at the given term
   227  // and returns the total number of bytes removed. Nonexistent entries are
   228  // silently skipped over.
   229  func maybePurgeSideloaded(
   230  	ctx context.Context, ss SideloadStorage, firstIndex, lastIndex uint64, term uint64,
   231  ) (int64, error) {
   232  	var totalSize int64
   233  	for i := firstIndex; i <= lastIndex; i++ {
   234  		size, err := ss.Purge(ctx, i, term)
   235  		if err != nil && !errors.Is(err, errSideloadedFileNotFound) {
   236  			return totalSize, err
   237  		}
   238  		totalSize += size
   239  	}
   240  	return totalSize, nil
   241  }