github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/batcheval/cmd_add_sstable.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package batcheval 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/clusterversion" 17 "github.com/cockroachdb/cockroach/pkg/keys" 18 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/storage" 23 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 24 "github.com/cockroachdb/cockroach/pkg/util" 25 "github.com/cockroachdb/cockroach/pkg/util/log" 26 "github.com/cockroachdb/errors" 27 "github.com/kr/pretty" 28 ) 29 30 func init() { 31 RegisterReadWriteCommand(roachpb.AddSSTable, DefaultDeclareKeys, EvalAddSSTable) 32 } 33 34 // EvalAddSSTable evaluates an AddSSTable command. 35 func EvalAddSSTable( 36 ctx context.Context, readWriter storage.ReadWriter, cArgs CommandArgs, _ roachpb.Response, 37 ) (result.Result, error) { 38 args := cArgs.Args.(*roachpb.AddSSTableRequest) 39 h := cArgs.Header 40 ms := cArgs.Stats 41 mvccStartKey, mvccEndKey := storage.MVCCKey{Key: args.Key}, storage.MVCCKey{Key: args.EndKey} 42 43 // TODO(tschottdorf): restore the below in some form (gets in the way of testing). 44 // _, span := tracing.ChildSpan(ctx, fmt.Sprintf("AddSSTable [%s,%s)", args.Key, args.EndKey)) 45 // defer tracing.FinishSpan(span) 46 log.Eventf(ctx, "evaluating AddSSTable [%s,%s)", mvccStartKey.Key, mvccEndKey.Key) 47 48 // IMPORT INTO should not proceed if any KVs from the SST shadow existing data 49 // entries - #38044. 50 var skippedKVStats enginepb.MVCCStats 51 var err error 52 if args.DisallowShadowing { 53 if skippedKVStats, err = checkForKeyCollisions(ctx, readWriter, mvccStartKey, mvccEndKey, args.Data); err != nil { 54 return result.Result{}, errors.Wrap(err, "checking for key collisions") 55 } 56 } 57 58 // Verify that the keys in the sstable are within the range specified by the 59 // request header, and if the request did not include pre-computed stats, 60 // compute the expected MVCC stats delta of ingesting the SST. 61 dataIter, err := storage.NewMemSSTIterator(args.Data, true) 62 if err != nil { 63 return result.Result{}, err 64 } 65 defer dataIter.Close() 66 67 // Check that the first key is in the expected range. 68 dataIter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) 69 ok, err := dataIter.Valid() 70 if err != nil { 71 return result.Result{}, err 72 } else if ok { 73 if unsafeKey := dataIter.UnsafeKey(); unsafeKey.Less(mvccStartKey) { 74 return result.Result{}, errors.Errorf("first key %s not in request range [%s,%s)", 75 unsafeKey.Key, mvccStartKey.Key, mvccEndKey.Key) 76 } 77 } 78 79 // Get the MVCCStats for the SST being ingested. 80 var stats enginepb.MVCCStats 81 if args.MVCCStats != nil { 82 stats = *args.MVCCStats 83 } 84 85 // Stats are computed on-the-fly when shadowing of keys is disallowed. If we 86 // took the fast path and race is enabled, assert the stats were correctly 87 // computed. 88 verifyFastPath := args.DisallowShadowing && util.RaceEnabled 89 if args.MVCCStats == nil || verifyFastPath { 90 log.VEventf(ctx, 2, "computing MVCCStats for SSTable [%s,%s)", mvccStartKey.Key, mvccEndKey.Key) 91 92 computed, err := storage.ComputeStatsGo( 93 dataIter, mvccStartKey.Key, mvccEndKey.Key, h.Timestamp.WallTime) 94 if err != nil { 95 return result.Result{}, errors.Wrap(err, "computing SSTable MVCC stats") 96 } 97 98 if verifyFastPath { 99 // Update the timestamp to that of the recently computed stats to get the 100 // diff passing. 101 stats.LastUpdateNanos = computed.LastUpdateNanos 102 if !stats.Equal(computed) { 103 log.Fatalf(ctx, "fast-path MVCCStats computation gave wrong result: diff(fast, computed) = %s", 104 pretty.Diff(stats, computed)) 105 } 106 } 107 stats = computed 108 } 109 110 dataIter.SeekGE(mvccEndKey) 111 ok, err = dataIter.Valid() 112 if err != nil { 113 return result.Result{}, err 114 } else if ok { 115 return result.Result{}, errors.Errorf("last key %s not in request range [%s,%s)", 116 dataIter.UnsafeKey(), mvccStartKey.Key, mvccEndKey.Key) 117 } 118 119 // The above MVCCStats represents what is in this new SST. 120 // 121 // *If* the keys in the SST do not conflict with keys currently in this range, 122 // then adding the stats for this SST to the range stats should yield the 123 // correct overall stats. 124 // 125 // *However*, if the keys in this range *do* overlap with keys already in this 126 // range, then adding the SST semantically *replaces*, rather than adds, those 127 // keys, and the net effect on the stats is not so simple. 128 // 129 // To perfectly compute the correct net stats, you could a) determine the 130 // stats for the span of the existing range that this SST covers and subtract 131 // it from the range's stats, then b) use a merging iterator that reads from 132 // the SST and then underlying range and compute the stats of that merged 133 // span, and then add those stats back in. That would result in correct stats 134 // that reflect the merging semantics when the SST "shadows" an existing key. 135 // 136 // If the underlying range is mostly empty, this isn't terribly expensive -- 137 // computing the existing stats to subtract is cheap, as there is little or no 138 // existing data to traverse and b) is also pretty cheap -- the merging 139 // iterator can quickly iterate the in-memory SST. 140 // 141 // However, if the underlying range is _not_ empty, then this is not cheap: 142 // recomputing its stats involves traversing lots of data, and iterating the 143 // merged iterator has to constantly go back and forth to the iterator. 144 // 145 // If we assume that most SSTs don't shadow too many keys, then the error of 146 // simply adding the SST stats directly to the range stats is minimal. In the 147 // worst-case, when we retry a whole SST, then it could be overcounting the 148 // entire file, but we can hope that that is rare. In the worst case, it may 149 // cause splitting an under-filled range that would later merge when the 150 // over-count is fixed. 151 // 152 // We can indicate that these stats contain this estimation using the flag in 153 // the MVCC stats so that later re-computations will not be surprised to find 154 // any discrepancies. 155 // 156 // Callers can trigger such a re-computation to fixup any discrepancies (and 157 // remove the ContainsEstimates flag) after they are done ingesting files by 158 // sending an explicit recompute. 159 // 160 // There is a significant performance win to be achieved by ensuring that the 161 // stats computed are not estimates as it prevents recomputation on splits. 162 // Running AddSSTable with disallowShadowing=true gets us close to this as we 163 // do not allow colliding keys to be ingested. However, in the situation that 164 // two SSTs have KV(s) which "perfectly" shadow an existing key (equal ts and 165 // value), we do not consider this a collision. While the KV would just 166 // overwrite the existing data, the stats would be added to the cumulative 167 // stats of the AddSSTable command, causing a double count for such KVs. 168 // Therefore, we compute the stats for these "skipped" KVs on-the-fly while 169 // checking for the collision condition in C++ and subtract them from the 170 // stats of the SST being ingested before adding them to the running 171 // cumulative for this command. These stats can then be marked as accurate. 172 if args.DisallowShadowing { 173 stats.Subtract(skippedKVStats) 174 stats.ContainsEstimates = 0 175 } else { 176 _ = clusterversion.VersionContainsEstimatesCounter // see for info on ContainsEstimates migration 177 stats.ContainsEstimates++ 178 } 179 180 ms.Add(stats) 181 182 if args.IngestAsWrites { 183 log.VEventf(ctx, 2, "ingesting SST (%d keys/%d bytes) via regular write batch", stats.KeyCount, len(args.Data)) 184 dataIter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) 185 for { 186 ok, err := dataIter.Valid() 187 if err != nil { 188 return result.Result{}, err 189 } else if !ok { 190 break 191 } 192 // NB: This is *not* a general transformation of any arbitrary SST to a 193 // WriteBatch: it assumes every key in the SST is a simple Set. This is 194 // already assumed elsewhere in this RPC though, so that's OK here. 195 if err := readWriter.Put(dataIter.UnsafeKey(), dataIter.UnsafeValue()); err != nil { 196 return result.Result{}, err 197 } 198 dataIter.Next() 199 } 200 return result.Result{}, nil 201 } 202 203 return result.Result{ 204 Replicated: kvserverpb.ReplicatedEvalResult{ 205 AddSSTable: &kvserverpb.ReplicatedEvalResult_AddSSTable{ 206 Data: args.Data, 207 CRC32: util.CRC32(args.Data), 208 }, 209 }, 210 }, nil 211 } 212 213 func checkForKeyCollisions( 214 _ context.Context, 215 readWriter storage.ReadWriter, 216 mvccStartKey storage.MVCCKey, 217 mvccEndKey storage.MVCCKey, 218 data []byte, 219 ) (enginepb.MVCCStats, error) { 220 // We could get a spansetBatch so fetch the underlying db engine as 221 // we need access to the underlying C.DBIterator later, and the 222 // dbIteratorGetter is not implemented by a spansetBatch. 223 dbEngine := spanset.GetDBEngine(readWriter, roachpb.Span{Key: mvccStartKey.Key, EndKey: mvccEndKey.Key}) 224 225 emptyMVCCStats := enginepb.MVCCStats{} 226 227 // Create iterator over the existing data. 228 existingDataIter := dbEngine.NewIterator(storage.IterOptions{UpperBound: mvccEndKey.Key}) 229 defer existingDataIter.Close() 230 existingDataIter.SeekGE(mvccStartKey) 231 if ok, err := existingDataIter.Valid(); err != nil { 232 return emptyMVCCStats, errors.Wrap(err, "checking for key collisions") 233 } else if !ok { 234 // Target key range is empty, so it is safe to ingest. 235 return emptyMVCCStats, nil 236 } 237 238 return existingDataIter.CheckForKeyCollisions(data, mvccStartKey.Key, mvccEndKey.Key) 239 }