github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/gc/data_distribution_test.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package gc 12 13 import ( 14 "context" 15 "fmt" 16 "math/rand" 17 "sort" 18 "testing" 19 "time" 20 21 "github.com/cockroachdb/cockroach/pkg/keys" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/storage" 25 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 26 "github.com/cockroachdb/cockroach/pkg/util/encoding" 27 "github.com/cockroachdb/cockroach/pkg/util/hlc" 28 "github.com/cockroachdb/cockroach/pkg/util/uuid" 29 "github.com/stretchr/testify/require" 30 ) 31 32 // dataDistribution is an abstraction for testing that represents a stream of 33 // MVCCKeyValues. The stream may indicate that a value is an intent by returning 34 // a non-nil transaction. If an intent is returned it must have a higher 35 // timestamp than any other version written for the key. 36 type dataDistribution func() (storage.MVCCKeyValue, *roachpb.Transaction, bool) 37 38 // setupTest writes the data from this distribution into eng. All data should 39 // be a part of the range represented by desc. 40 func (ds dataDistribution) setupTest( 41 t testing.TB, eng storage.Engine, desc roachpb.RangeDescriptor, 42 ) enginepb.MVCCStats { 43 ctx := context.Background() 44 var maxTs hlc.Timestamp 45 var ms enginepb.MVCCStats 46 for { 47 kv, txn, ok := ds() 48 if !ok { 49 break 50 } 51 if txn == nil { 52 require.NoError(t, eng.Put(kv.Key, kv.Value)) 53 } else { 54 // TODO(ajwerner): Decide if using MVCCPut is worth it. 55 ts := kv.Key.Timestamp 56 if txn.ReadTimestamp == (hlc.Timestamp{}) { 57 txn.ReadTimestamp = ts 58 } 59 if txn.WriteTimestamp == (hlc.Timestamp{}) { 60 txn.WriteTimestamp = ts 61 } 62 err := storage.MVCCPut(ctx, eng, &ms, kv.Key.Key, ts, 63 roachpb.Value{RawBytes: kv.Value}, txn) 64 require.NoError(t, err) 65 } 66 if !kv.Key.Timestamp.Less(maxTs) { 67 maxTs = kv.Key.Timestamp 68 } 69 } 70 require.NoError(t, eng.Flush()) 71 snap := eng.NewSnapshot() 72 defer snap.Close() 73 ms, err := rditer.ComputeStatsForRange(&desc, snap, maxTs.WallTime) 74 require.NoError(t, err) 75 return ms 76 } 77 78 // newDataDistribution constructs a dataDistribution from various underlying 79 // distributions. 80 func newDataDistribution( 81 tsDist func() hlc.Timestamp, 82 keyDist func() roachpb.Key, 83 valueDist func() []byte, 84 versionsPerKey func() int, 85 intentFrac float64, 86 totalKeys int, 87 rng *rand.Rand, 88 ) dataDistribution { 89 // TODO(ajwerner): provide a mechanism to control the rate of expired intents 90 // or the intent age. Such a knob would likely require decoupling intents from 91 // other keys. 92 var ( 93 remaining = totalKeys 94 key roachpb.Key 95 seen = map[string]struct{}{} 96 timestamps []hlc.Timestamp 97 haveIntent bool 98 ) 99 return func() (storage.MVCCKeyValue, *roachpb.Transaction, bool) { 100 if remaining == 0 { 101 return storage.MVCCKeyValue{}, nil, false 102 } 103 defer func() { remaining-- }() 104 for len(timestamps) == 0 { 105 versions := versionsPerKey() 106 if versions == 0 { 107 continue 108 } 109 if versions > remaining { 110 versions = remaining 111 } 112 timestamps = make([]hlc.Timestamp, 0, versions) 113 for i := 0; i < versions; i++ { 114 timestamps = append(timestamps, tsDist()) 115 } 116 sort.Slice(timestamps, func(i, j int) bool { 117 return timestamps[i].Less(timestamps[j]) 118 }) 119 for { 120 key = keyDist() 121 sk := string(key) 122 if _, ok := seen[sk]; ok { 123 continue 124 } 125 seen[sk] = struct{}{} 126 break 127 } 128 haveIntent = rng.Float64() < intentFrac 129 } 130 ts := timestamps[0] 131 timestamps = timestamps[1:] 132 var txn *roachpb.Transaction 133 if len(timestamps) == 0 && haveIntent { 134 txn = &roachpb.Transaction{ 135 Status: roachpb.PENDING, 136 ReadTimestamp: ts, 137 MaxTimestamp: ts.Next().Next(), 138 } 139 txn.ID = uuid.MakeV4() 140 txn.WriteTimestamp = ts 141 txn.Key = keyDist() 142 } 143 return storage.MVCCKeyValue{ 144 Key: storage.MVCCKey{Key: key, Timestamp: ts}, 145 Value: valueDist(), 146 }, txn, true 147 } 148 } 149 150 // distSpec abstractly represents a distribution. 151 type distSpec interface { 152 dist(maxRows int, rng *rand.Rand) dataDistribution 153 desc() *roachpb.RangeDescriptor 154 String() string 155 } 156 157 // uniformDistSpec is a distSpec which represents uniform distributions over its 158 // various dimensions. 159 type uniformDistSpec struct { 160 tsFrom, tsTo int64 // seconds 161 keySuffixMin, keySuffixMax int 162 valueLenMin, valueLenMax int 163 deleteFrac float64 164 keysPerValueMin, keysPerValueMax int 165 intentFrac float64 166 } 167 168 var _ distSpec = uniformDistSpec{} 169 170 func (ds uniformDistSpec) dist(maxRows int, rng *rand.Rand) dataDistribution { 171 return newDataDistribution( 172 uniformTimestampDistribution(ds.tsFrom*time.Second.Nanoseconds(), ds.tsTo*time.Second.Nanoseconds(), rng), 173 uniformTableKeyDistribution(ds.desc().StartKey.AsRawKey(), ds.keySuffixMin, ds.keySuffixMax, rng), 174 uniformValueDistribution(ds.valueLenMin, ds.valueLenMax, ds.deleteFrac, rng), 175 uniformValuesPerKey(ds.keysPerValueMin, ds.keysPerValueMax, rng), 176 ds.intentFrac, 177 maxRows, 178 rng, 179 ) 180 } 181 182 func (ds uniformDistSpec) desc() *roachpb.RangeDescriptor { 183 tablePrefix := keys.SystemSQLCodec.TablePrefix(42) 184 return &roachpb.RangeDescriptor{ 185 StartKey: roachpb.RKey(tablePrefix), 186 EndKey: roachpb.RKey(tablePrefix.PrefixEnd()), 187 } 188 } 189 190 func (ds uniformDistSpec) String() string { 191 return fmt.Sprintf( 192 "ts=[%d,%d],"+ 193 "keySuffix=[%d,%d],"+ 194 "valueLen=[%d,%d],"+ 195 "keysPerValue=[%d,%d],"+ 196 "deleteFrac=%f,intentFrac=%f", 197 ds.tsFrom, ds.tsTo, 198 ds.keySuffixMin, ds.keySuffixMax, 199 ds.valueLenMin, ds.valueLenMax, 200 ds.keysPerValueMin, ds.keysPerValueMax, 201 ds.deleteFrac, ds.intentFrac) 202 } 203 204 // uniformTimestamp returns an hlc timestamp distribution with a wall time 205 // uniform over [from, to] and a zero logical timestamp. 206 func uniformTimestampDistribution(from, to int64, rng *rand.Rand) func() hlc.Timestamp { 207 if from >= to { 208 panic(fmt.Errorf("from (%d) >= to (%d)", from, to)) 209 } 210 n := int(to-from) + 1 211 return func() hlc.Timestamp { 212 return hlc.Timestamp{WallTime: from + int64(rng.Intn(n))} 213 } 214 } 215 216 // returns a uniform length random value distribution. 217 func uniformValueDistribution(min, max int, deleteFrac float64, rng *rand.Rand) func() []byte { 218 if min > max { 219 panic(fmt.Errorf("min (%d) > max (%d)", min, max)) 220 } 221 n := (max - min) + 1 222 return func() []byte { 223 if rng.Float64() < deleteFrac { 224 return nil 225 } 226 value := make([]byte, min+rng.Intn(n)) 227 if _, err := rng.Read(value); err != nil { 228 panic(err) 229 } 230 return value 231 } 232 } 233 234 func uniformValuesPerKey(valuesPerKeyMin, valuesPerKeyMax int, rng *rand.Rand) func() int { 235 if valuesPerKeyMin > valuesPerKeyMax { 236 panic(fmt.Errorf("min (%d) > max (%d)", valuesPerKeyMin, valuesPerKeyMax)) 237 } 238 n := (valuesPerKeyMax - valuesPerKeyMin) + 1 239 return func() int { return valuesPerKeyMin + rng.Intn(n) } 240 } 241 242 func uniformTableKeyDistribution( 243 prefix roachpb.Key, suffixMin, suffixMax int, rng *rand.Rand, 244 ) func() roachpb.Key { 245 if suffixMin > suffixMax { 246 panic(fmt.Errorf("suffixMin (%d) > suffixMax (%d)", suffixMin, suffixMax)) 247 } 248 n := (suffixMax - suffixMin) + 1 249 return func() roachpb.Key { 250 randData := make([]byte, suffixMin+rng.Intn(n)) 251 _, _ = rng.Read(randData) 252 return encoding.EncodeBytesAscending(prefix[0:len(prefix):len(prefix)], randData) 253 } 254 }