github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_sideload.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 17 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/util/log" 20 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 21 "github.com/cockroachdb/errors" 22 "go.etcd.io/etcd/raft/raftpb" 23 ) 24 25 var errSideloadedFileNotFound = errors.New("sideloaded file not found") 26 27 // SideloadStorage is the interface used for Raft SSTable sideloading. 28 // Implementations do not need to be thread safe. 29 type SideloadStorage interface { 30 // The directory in which the sideloaded files are stored. May or may not 31 // exist. 32 Dir() string 33 // Writes the given contents to the file specified by the given index and 34 // term. Overwrites the file if it already exists. 35 Put(_ context.Context, index, term uint64, contents []byte) error 36 // Load the file at the given index and term. Return errSideloadedFileNotFound when no 37 // such file is present. 38 Get(_ context.Context, index, term uint64) ([]byte, error) 39 // Purge removes the file at the given index and term. It may also 40 // remove any leftover files at the same index and earlier terms, but 41 // is not required to do so. When no file at the given index and term 42 // exists, returns errSideloadedFileNotFound. 43 // 44 // Returns the total size of the purged payloads. 45 Purge(_ context.Context, index, term uint64) (int64, error) 46 // Clear files that may have been written by this SideloadStorage. 47 Clear(context.Context) error 48 // TruncateTo removes all files belonging to an index strictly smaller than 49 // the given one. Returns the number of bytes freed, the number of bytes in 50 // files that remain, or an error. 51 TruncateTo(_ context.Context, index uint64) (freed, retained int64, _ error) 52 // Returns an absolute path to the file that Get() would return the contents 53 // of. Does not check whether the file actually exists. 54 Filename(_ context.Context, index, term uint64) (string, error) 55 } 56 57 // maybeSideloadEntriesRaftMuLocked should be called with a slice of "fat" 58 // entries before appending them to the Raft log. For those entries which are 59 // sideloadable, this is where the actual sideloading happens: in come fat 60 // proposals, out go thin proposals. Note that this method is to be called 61 // before modifications are persisted to the log. The other way around is 62 // incorrect since an ill-timed crash gives you thin proposals and no files. 63 // 64 // The passed-in slice is not mutated. 65 func (r *Replica) maybeSideloadEntriesRaftMuLocked( 66 ctx context.Context, entriesToAppend []raftpb.Entry, 67 ) (_ []raftpb.Entry, sideloadedEntriesSize int64, _ error) { 68 return maybeSideloadEntriesImpl(ctx, entriesToAppend, r.raftMu.sideloaded) 69 } 70 71 // maybeSideloadEntriesImpl iterates through the provided slice of entries. If 72 // no sideloadable entries are found, it returns the same slice. Otherwise, it 73 // returns a new slice in which all applicable entries have been sideloaded to 74 // the specified SideloadStorage. 75 func maybeSideloadEntriesImpl( 76 ctx context.Context, entriesToAppend []raftpb.Entry, sideloaded SideloadStorage, 77 ) (_ []raftpb.Entry, sideloadedEntriesSize int64, _ error) { 78 79 cow := false 80 for i := range entriesToAppend { 81 if sniffSideloadedRaftCommand(entriesToAppend[i].Data) { 82 log.Event(ctx, "sideloading command in append") 83 if !cow { 84 // Avoid mutating the passed-in entries directly. The caller 85 // wants them to remain "fat". 86 log.Eventf(ctx, "copying entries slice of length %d", len(entriesToAppend)) 87 cow = true 88 entriesToAppend = append([]raftpb.Entry(nil), entriesToAppend...) 89 } 90 91 ent := &entriesToAppend[i] 92 cmdID, data := DecodeRaftCommand(ent.Data) // cheap 93 94 // Unmarshal the command into an object that we can mutate. 95 var strippedCmd kvserverpb.RaftCommand 96 if err := protoutil.Unmarshal(data, &strippedCmd); err != nil { 97 return nil, 0, err 98 } 99 100 if strippedCmd.ReplicatedEvalResult.AddSSTable == nil { 101 // Still no AddSSTable; someone must've proposed a v2 command 102 // but not because it contains an inlined SSTable. Strange, but 103 // let's be future proof. 104 log.Warning(ctx, "encountered sideloaded Raft command without inlined payload") 105 continue 106 } 107 108 // Actually strip the command. 109 dataToSideload := strippedCmd.ReplicatedEvalResult.AddSSTable.Data 110 strippedCmd.ReplicatedEvalResult.AddSSTable.Data = nil 111 112 // Marshal the command and attach to the Raft entry. 113 { 114 data := make([]byte, raftCommandPrefixLen+strippedCmd.Size()) 115 encodeRaftCommandPrefix(data[:raftCommandPrefixLen], raftVersionSideloaded, cmdID) 116 _, err := protoutil.MarshalTo(&strippedCmd, data[raftCommandPrefixLen:]) 117 if err != nil { 118 return nil, 0, errors.Wrap(err, "while marshaling stripped sideloaded command") 119 } 120 ent.Data = data 121 } 122 123 log.Eventf(ctx, "writing payload at index=%d term=%d", ent.Index, ent.Term) 124 if err := sideloaded.Put(ctx, ent.Index, ent.Term, dataToSideload); err != nil { 125 return nil, 0, err 126 } 127 sideloadedEntriesSize += int64(len(dataToSideload)) 128 } 129 } 130 return entriesToAppend, sideloadedEntriesSize, nil 131 } 132 133 func sniffSideloadedRaftCommand(data []byte) (sideloaded bool) { 134 return len(data) > 0 && data[0] == byte(raftVersionSideloaded) 135 } 136 137 // maybeInlineSideloadedRaftCommand takes an entry and inspects it. If its 138 // command encoding version indicates a sideloaded entry, it uses the entryCache 139 // or SideloadStorage to inline the payload, returning a new entry (which must 140 // be treated as immutable by the caller) or nil (if inlining does not apply) 141 // 142 // If a payload is missing, returns an error whose Cause() is 143 // errSideloadedFileNotFound. 144 func maybeInlineSideloadedRaftCommand( 145 ctx context.Context, 146 rangeID roachpb.RangeID, 147 ent raftpb.Entry, 148 sideloaded SideloadStorage, 149 entryCache *raftentry.Cache, 150 ) (*raftpb.Entry, error) { 151 if !sniffSideloadedRaftCommand(ent.Data) { 152 return nil, nil 153 } 154 log.Event(ctx, "inlining sideloaded SSTable") 155 // We could unmarshal this yet again, but if it's committed we 156 // are very likely to have appended it recently, in which case 157 // we can save work. 158 cachedSingleton, _, _, _ := entryCache.Scan( 159 nil, rangeID, ent.Index, ent.Index+1, 1<<20, 160 ) 161 162 if len(cachedSingleton) > 0 { 163 log.Event(ctx, "using cache hit") 164 return &cachedSingleton[0], nil 165 } 166 167 // Make a shallow copy. 168 entCpy := ent 169 ent = entCpy 170 171 log.Event(ctx, "inlined entry not cached") 172 // Out of luck, for whatever reason the inlined proposal isn't in the cache. 173 cmdID, data := DecodeRaftCommand(ent.Data) 174 175 var command kvserverpb.RaftCommand 176 if err := protoutil.Unmarshal(data, &command); err != nil { 177 return nil, err 178 } 179 180 if len(command.ReplicatedEvalResult.AddSSTable.Data) > 0 { 181 // The entry we started out with was already "fat". This happens when 182 // the entry reached us through a preemptive snapshot (when we didn't 183 // have a ReplicaID yet). 184 log.Event(ctx, "entry already inlined") 185 return &ent, nil 186 } 187 188 sideloadedData, err := sideloaded.Get(ctx, ent.Index, ent.Term) 189 if err != nil { 190 return nil, errors.Wrap(err, "loading sideloaded data") 191 } 192 command.ReplicatedEvalResult.AddSSTable.Data = sideloadedData 193 { 194 data := make([]byte, raftCommandPrefixLen+command.Size()) 195 encodeRaftCommandPrefix(data[:raftCommandPrefixLen], raftVersionSideloaded, cmdID) 196 _, err := protoutil.MarshalTo(&command, data[raftCommandPrefixLen:]) 197 if err != nil { 198 return nil, err 199 } 200 ent.Data = data 201 } 202 return &ent, nil 203 } 204 205 // assertSideloadedRaftCommandInlined asserts that if the provided entry is a 206 // sideloaded entry, then its payload has already been inlined. Doing so 207 // requires unmarshalling the raft command, so this assertion should be kept out 208 // of performance critical paths. 209 func assertSideloadedRaftCommandInlined(ctx context.Context, ent *raftpb.Entry) { 210 if !sniffSideloadedRaftCommand(ent.Data) { 211 return 212 } 213 214 var command kvserverpb.RaftCommand 215 _, data := DecodeRaftCommand(ent.Data) 216 if err := protoutil.Unmarshal(data, &command); err != nil { 217 log.Fatalf(ctx, "%v", err) 218 } 219 220 if len(command.ReplicatedEvalResult.AddSSTable.Data) == 0 { 221 // The entry is "thin", which is what this assertion is checking for. 222 log.Fatalf(ctx, "found thin sideloaded raft command: %+v", command) 223 } 224 } 225 226 // maybePurgeSideloaded removes [firstIndex, ..., lastIndex] at the given term 227 // and returns the total number of bytes removed. Nonexistent entries are 228 // silently skipped over. 229 func maybePurgeSideloaded( 230 ctx context.Context, ss SideloadStorage, firstIndex, lastIndex uint64, term uint64, 231 ) (int64, error) { 232 var totalSize int64 233 for i := firstIndex; i <= lastIndex; i++ { 234 size, err := ss.Purge(ctx, i, term) 235 if err != nil && !errors.Is(err, errSideloadedFileNotFound) { 236 return totalSize, err 237 } 238 totalSize += size 239 } 240 return totalSize, nil 241 }