github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/gc/data_distribution_test.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package gc
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math/rand"
    17  	"sort"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/keys"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/storage"
    25  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    26  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    27  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    28  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    29  	"github.com/stretchr/testify/require"
    30  )
    31  
    32  // dataDistribution is an abstraction for testing that represents a stream of
    33  // MVCCKeyValues. The stream may indicate that a value is an intent by returning
    34  // a non-nil transaction. If an intent is returned it must have a higher
    35  // timestamp than any other version written for the key.
    36  type dataDistribution func() (storage.MVCCKeyValue, *roachpb.Transaction, bool)
    37  
    38  // setupTest writes the data from this distribution into eng. All data should
    39  // be a part of the range represented by desc.
    40  func (ds dataDistribution) setupTest(
    41  	t testing.TB, eng storage.Engine, desc roachpb.RangeDescriptor,
    42  ) enginepb.MVCCStats {
    43  	ctx := context.Background()
    44  	var maxTs hlc.Timestamp
    45  	var ms enginepb.MVCCStats
    46  	for {
    47  		kv, txn, ok := ds()
    48  		if !ok {
    49  			break
    50  		}
    51  		if txn == nil {
    52  			require.NoError(t, eng.Put(kv.Key, kv.Value))
    53  		} else {
    54  			// TODO(ajwerner): Decide if using MVCCPut is worth it.
    55  			ts := kv.Key.Timestamp
    56  			if txn.ReadTimestamp == (hlc.Timestamp{}) {
    57  				txn.ReadTimestamp = ts
    58  			}
    59  			if txn.WriteTimestamp == (hlc.Timestamp{}) {
    60  				txn.WriteTimestamp = ts
    61  			}
    62  			err := storage.MVCCPut(ctx, eng, &ms, kv.Key.Key, ts,
    63  				roachpb.Value{RawBytes: kv.Value}, txn)
    64  			require.NoError(t, err)
    65  		}
    66  		if !kv.Key.Timestamp.Less(maxTs) {
    67  			maxTs = kv.Key.Timestamp
    68  		}
    69  	}
    70  	require.NoError(t, eng.Flush())
    71  	snap := eng.NewSnapshot()
    72  	defer snap.Close()
    73  	ms, err := rditer.ComputeStatsForRange(&desc, snap, maxTs.WallTime)
    74  	require.NoError(t, err)
    75  	return ms
    76  }
    77  
    78  // newDataDistribution constructs a dataDistribution from various underlying
    79  // distributions.
    80  func newDataDistribution(
    81  	tsDist func() hlc.Timestamp,
    82  	keyDist func() roachpb.Key,
    83  	valueDist func() []byte,
    84  	versionsPerKey func() int,
    85  	intentFrac float64,
    86  	totalKeys int,
    87  	rng *rand.Rand,
    88  ) dataDistribution {
    89  	// TODO(ajwerner): provide a mechanism to control the rate of expired intents
    90  	// or the intent age. Such a knob would likely require decoupling intents from
    91  	// other keys.
    92  	var (
    93  		remaining  = totalKeys
    94  		key        roachpb.Key
    95  		seen       = map[string]struct{}{}
    96  		timestamps []hlc.Timestamp
    97  		haveIntent bool
    98  	)
    99  	return func() (storage.MVCCKeyValue, *roachpb.Transaction, bool) {
   100  		if remaining == 0 {
   101  			return storage.MVCCKeyValue{}, nil, false
   102  		}
   103  		defer func() { remaining-- }()
   104  		for len(timestamps) == 0 {
   105  			versions := versionsPerKey()
   106  			if versions == 0 {
   107  				continue
   108  			}
   109  			if versions > remaining {
   110  				versions = remaining
   111  			}
   112  			timestamps = make([]hlc.Timestamp, 0, versions)
   113  			for i := 0; i < versions; i++ {
   114  				timestamps = append(timestamps, tsDist())
   115  			}
   116  			sort.Slice(timestamps, func(i, j int) bool {
   117  				return timestamps[i].Less(timestamps[j])
   118  			})
   119  			for {
   120  				key = keyDist()
   121  				sk := string(key)
   122  				if _, ok := seen[sk]; ok {
   123  					continue
   124  				}
   125  				seen[sk] = struct{}{}
   126  				break
   127  			}
   128  			haveIntent = rng.Float64() < intentFrac
   129  		}
   130  		ts := timestamps[0]
   131  		timestamps = timestamps[1:]
   132  		var txn *roachpb.Transaction
   133  		if len(timestamps) == 0 && haveIntent {
   134  			txn = &roachpb.Transaction{
   135  				Status:        roachpb.PENDING,
   136  				ReadTimestamp: ts,
   137  				MaxTimestamp:  ts.Next().Next(),
   138  			}
   139  			txn.ID = uuid.MakeV4()
   140  			txn.WriteTimestamp = ts
   141  			txn.Key = keyDist()
   142  		}
   143  		return storage.MVCCKeyValue{
   144  			Key:   storage.MVCCKey{Key: key, Timestamp: ts},
   145  			Value: valueDist(),
   146  		}, txn, true
   147  	}
   148  }
   149  
   150  // distSpec abstractly represents a distribution.
   151  type distSpec interface {
   152  	dist(maxRows int, rng *rand.Rand) dataDistribution
   153  	desc() *roachpb.RangeDescriptor
   154  	String() string
   155  }
   156  
   157  // uniformDistSpec is a distSpec which represents uniform distributions over its
   158  // various dimensions.
   159  type uniformDistSpec struct {
   160  	tsFrom, tsTo                     int64 // seconds
   161  	keySuffixMin, keySuffixMax       int
   162  	valueLenMin, valueLenMax         int
   163  	deleteFrac                       float64
   164  	keysPerValueMin, keysPerValueMax int
   165  	intentFrac                       float64
   166  }
   167  
   168  var _ distSpec = uniformDistSpec{}
   169  
   170  func (ds uniformDistSpec) dist(maxRows int, rng *rand.Rand) dataDistribution {
   171  	return newDataDistribution(
   172  		uniformTimestampDistribution(ds.tsFrom*time.Second.Nanoseconds(), ds.tsTo*time.Second.Nanoseconds(), rng),
   173  		uniformTableKeyDistribution(ds.desc().StartKey.AsRawKey(), ds.keySuffixMin, ds.keySuffixMax, rng),
   174  		uniformValueDistribution(ds.valueLenMin, ds.valueLenMax, ds.deleteFrac, rng),
   175  		uniformValuesPerKey(ds.keysPerValueMin, ds.keysPerValueMax, rng),
   176  		ds.intentFrac,
   177  		maxRows,
   178  		rng,
   179  	)
   180  }
   181  
   182  func (ds uniformDistSpec) desc() *roachpb.RangeDescriptor {
   183  	tablePrefix := keys.SystemSQLCodec.TablePrefix(42)
   184  	return &roachpb.RangeDescriptor{
   185  		StartKey: roachpb.RKey(tablePrefix),
   186  		EndKey:   roachpb.RKey(tablePrefix.PrefixEnd()),
   187  	}
   188  }
   189  
   190  func (ds uniformDistSpec) String() string {
   191  	return fmt.Sprintf(
   192  		"ts=[%d,%d],"+
   193  			"keySuffix=[%d,%d],"+
   194  			"valueLen=[%d,%d],"+
   195  			"keysPerValue=[%d,%d],"+
   196  			"deleteFrac=%f,intentFrac=%f",
   197  		ds.tsFrom, ds.tsTo,
   198  		ds.keySuffixMin, ds.keySuffixMax,
   199  		ds.valueLenMin, ds.valueLenMax,
   200  		ds.keysPerValueMin, ds.keysPerValueMax,
   201  		ds.deleteFrac, ds.intentFrac)
   202  }
   203  
   204  // uniformTimestamp returns an hlc timestamp distribution with a wall time
   205  // uniform over [from, to] and a zero logical timestamp.
   206  func uniformTimestampDistribution(from, to int64, rng *rand.Rand) func() hlc.Timestamp {
   207  	if from >= to {
   208  		panic(fmt.Errorf("from (%d) >= to (%d)", from, to))
   209  	}
   210  	n := int(to-from) + 1
   211  	return func() hlc.Timestamp {
   212  		return hlc.Timestamp{WallTime: from + int64(rng.Intn(n))}
   213  	}
   214  }
   215  
   216  // returns a uniform length random value distribution.
   217  func uniformValueDistribution(min, max int, deleteFrac float64, rng *rand.Rand) func() []byte {
   218  	if min > max {
   219  		panic(fmt.Errorf("min (%d) > max (%d)", min, max))
   220  	}
   221  	n := (max - min) + 1
   222  	return func() []byte {
   223  		if rng.Float64() < deleteFrac {
   224  			return nil
   225  		}
   226  		value := make([]byte, min+rng.Intn(n))
   227  		if _, err := rng.Read(value); err != nil {
   228  			panic(err)
   229  		}
   230  		return value
   231  	}
   232  }
   233  
   234  func uniformValuesPerKey(valuesPerKeyMin, valuesPerKeyMax int, rng *rand.Rand) func() int {
   235  	if valuesPerKeyMin > valuesPerKeyMax {
   236  		panic(fmt.Errorf("min (%d) > max (%d)", valuesPerKeyMin, valuesPerKeyMax))
   237  	}
   238  	n := (valuesPerKeyMax - valuesPerKeyMin) + 1
   239  	return func() int { return valuesPerKeyMin + rng.Intn(n) }
   240  }
   241  
   242  func uniformTableKeyDistribution(
   243  	prefix roachpb.Key, suffixMin, suffixMax int, rng *rand.Rand,
   244  ) func() roachpb.Key {
   245  	if suffixMin > suffixMax {
   246  		panic(fmt.Errorf("suffixMin (%d) > suffixMax (%d)", suffixMin, suffixMax))
   247  	}
   248  	n := (suffixMax - suffixMin) + 1
   249  	return func() roachpb.Key {
   250  		randData := make([]byte, suffixMin+rng.Intn(n))
   251  		_, _ = rng.Read(randData)
   252  		return encoding.EncodeBytesAscending(prefix[0:len(prefix):len(prefix)], randData)
   253  	}
   254  }