github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/physicalplan/fake_span_resolver.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package physicalplan
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"math/rand"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/keys"
    19  	"github.com/cockroachdb/cockroach/pkg/kv"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/log"
    23  )
    24  
    25  const avgRangesPerNode = 5
    26  
    27  // fakeSpanResolver is a SpanResovler which splits spans and distributes them to
    28  // nodes randomly. Each Seek() call generates a random distribution with
    29  // expected avgRangesPerNode ranges for each node.
    30  type fakeSpanResolver struct {
    31  	nodes []*roachpb.NodeDescriptor
    32  }
    33  
    34  var _ SpanResolver = &fakeSpanResolver{}
    35  
    36  // NewFakeSpanResolver creates a fake span resolver.
    37  func NewFakeSpanResolver(nodes []*roachpb.NodeDescriptor) SpanResolver {
    38  	return &fakeSpanResolver{
    39  		nodes: nodes,
    40  	}
    41  }
    42  
    43  // fakeRange indicates that a range between startKey and endKey is owned by a
    44  // certain node.
    45  type fakeRange struct {
    46  	startKey roachpb.Key
    47  	endKey   roachpb.Key
    48  	replica  *roachpb.NodeDescriptor
    49  }
    50  
    51  type fakeSpanResolverIterator struct {
    52  	fsr *fakeSpanResolver
    53  	// the fake span resolver needs to perform scans as part of Seek(); these
    54  	// scans are performed in the context of this txn - the same one using the
    55  	// results of the resolver - so that using the resolver doesn't introduce
    56  	// conflicts.
    57  	txn *kv.Txn
    58  	err error
    59  
    60  	// ranges are ordered by the key; the start key of the first one is the
    61  	// beginning of the current range and the end key of the last one is the end
    62  	// of the queried span.
    63  	ranges []fakeRange
    64  }
    65  
    66  // NewSpanResolverIterator is part of the SpanResolver interface.
    67  func (fsr *fakeSpanResolver) NewSpanResolverIterator(txn *kv.Txn) SpanResolverIterator {
    68  	return &fakeSpanResolverIterator{fsr: fsr, txn: txn}
    69  }
    70  
    71  // Seek is part of the SpanResolverIterator interface. Each Seek call generates
    72  // a random distribution of the given span.
    73  func (fit *fakeSpanResolverIterator) Seek(
    74  	ctx context.Context, span roachpb.Span, scanDir kvcoord.ScanDirection,
    75  ) {
    76  	// Set aside the last range from the previous seek.
    77  	var prevRange fakeRange
    78  	if fit.ranges != nil {
    79  		prevRange = fit.ranges[len(fit.ranges)-1]
    80  	}
    81  
    82  	// Scan the range and keep a list of all potential split keys.
    83  	kvs, err := fit.txn.Scan(ctx, span.Key, span.EndKey, 0)
    84  	if err != nil {
    85  		log.Errorf(ctx, "Error in fake span resolver scan: %s", err)
    86  		fit.err = err
    87  		return
    88  	}
    89  
    90  	// Populate splitKeys with potential split keys; all keys are strictly
    91  	// between span.Key and span.EndKey.
    92  	var splitKeys []roachpb.Key
    93  	lastKey := span.Key
    94  	for _, kv := range kvs {
    95  		// Extract the key for the row.
    96  		splitKey, err := keys.EnsureSafeSplitKey(kv.Key)
    97  		if err != nil {
    98  			fit.err = err
    99  			return
   100  		}
   101  		if !splitKey.Equal(lastKey) && span.ContainsKey(splitKey) {
   102  			splitKeys = append(splitKeys, splitKey)
   103  			lastKey = splitKey
   104  		}
   105  	}
   106  
   107  	// Generate fake splits. The number of splits is selected randomly between 0
   108  	// and a maximum value; we want to generate
   109  	//   x = #nodes * avgRangesPerNode
   110  	// splits on average, so the maximum number is 2x:
   111  	//   Expected[ rand(2x+1) ] = (0 + 1 + 2 + .. + 2x) / (2x + 1) = x.
   112  	maxSplits := 2 * len(fit.fsr.nodes) * avgRangesPerNode
   113  	if maxSplits > len(splitKeys) {
   114  		maxSplits = len(splitKeys)
   115  	}
   116  	numSplits := rand.Intn(maxSplits + 1)
   117  
   118  	// Use Robert Floyd's algorithm to generate numSplits distinct integers
   119  	// between 0 and len(splitKeys), just because it's so cool!
   120  	chosen := make(map[int]struct{})
   121  	for j := len(splitKeys) - numSplits; j < len(splitKeys); j++ {
   122  		t := rand.Intn(j + 1)
   123  		if _, alreadyChosen := chosen[t]; !alreadyChosen {
   124  			// Insert T.
   125  			chosen[t] = struct{}{}
   126  		} else {
   127  			// Insert J.
   128  			chosen[j] = struct{}{}
   129  		}
   130  	}
   131  
   132  	splits := make([]roachpb.Key, 0, numSplits+2)
   133  	splits = append(splits, span.Key)
   134  	for i := range splitKeys {
   135  		if _, ok := chosen[i]; ok {
   136  			splits = append(splits, splitKeys[i])
   137  		}
   138  	}
   139  	splits = append(splits, span.EndKey)
   140  
   141  	if scanDir == kvcoord.Descending {
   142  		// Reverse the order of the splits.
   143  		for i := 0; i < len(splits)/2; i++ {
   144  			j := len(splits) - i - 1
   145  			splits[i], splits[j] = splits[j], splits[i]
   146  		}
   147  	}
   148  
   149  	// Build ranges corresponding to the fake splits and assign them random
   150  	// replicas.
   151  	fit.ranges = make([]fakeRange, len(splits)-1)
   152  	for i := range fit.ranges {
   153  		fit.ranges[i] = fakeRange{
   154  			startKey: splits[i],
   155  			endKey:   splits[i+1],
   156  			replica:  fit.fsr.nodes[rand.Intn(len(fit.fsr.nodes))],
   157  		}
   158  	}
   159  
   160  	// Check for the case where the last range of the previous Seek() describes
   161  	// the same row as this seek. In this case we'll assign the same replica so we
   162  	// don't "split" column families of the same row across different replicas.
   163  	if prevRange.endKey != nil {
   164  		prefix, err := keys.EnsureSafeSplitKey(span.Key)
   165  		// EnsureSafeSplitKey returns an error for keys which do not specify a
   166  		// column family. In this case we don't need to worry about splitting the
   167  		// row.
   168  		if err == nil && len(prevRange.endKey) >= len(prefix) &&
   169  			bytes.Equal(prefix, prevRange.endKey[:len(prefix)]) {
   170  			fit.ranges[0].replica = prevRange.replica
   171  		}
   172  	}
   173  }
   174  
   175  // Valid is part of the SpanResolverIterator interface.
   176  func (fit *fakeSpanResolverIterator) Valid() bool {
   177  	return fit.err == nil
   178  }
   179  
   180  // Error is part of the SpanResolverIterator interface.
   181  func (fit *fakeSpanResolverIterator) Error() error {
   182  	return fit.err
   183  }
   184  
   185  // NeedAnother is part of the SpanResolverIterator interface.
   186  func (fit *fakeSpanResolverIterator) NeedAnother() bool {
   187  	return len(fit.ranges) > 1
   188  }
   189  
   190  // Next is part of the SpanResolverIterator interface.
   191  func (fit *fakeSpanResolverIterator) Next(_ context.Context) {
   192  	if len(fit.ranges) <= 1 {
   193  		panic("Next called with no more ranges")
   194  	}
   195  	fit.ranges = fit.ranges[1:]
   196  }
   197  
   198  // Desc is part of the SpanResolverIterator interface.
   199  func (fit *fakeSpanResolverIterator) Desc() roachpb.RangeDescriptor {
   200  	return roachpb.RangeDescriptor{
   201  		StartKey: roachpb.RKey(fit.ranges[0].startKey),
   202  		EndKey:   roachpb.RKey(fit.ranges[0].endKey),
   203  	}
   204  }
   205  
   206  // ReplicaInfo is part of the SpanResolverIterator interface.
   207  func (fit *fakeSpanResolverIterator) ReplicaInfo(
   208  	_ context.Context,
   209  ) (roachpb.ReplicaDescriptor, error) {
   210  	n := fit.ranges[0].replica
   211  	return roachpb.ReplicaDescriptor{NodeID: n.NodeID}, nil
   212  }