github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/row/kv_batch_fetcher.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package row
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/kv"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    22  	"github.com/cockroachdb/cockroach/pkg/util"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // kvBatchSize is the number of keys we request at a time.
    28  // On a single node, 1000 was enough to avoid any performance degradation. On
    29  // multi-node clusters, we want bigger chunks to make up for the higher latency.
    30  // TODO(radu): parameters like this should be configurable
    31  var kvBatchSize int64 = 10000
    32  
    33  // TestingSetKVBatchSize changes the kvBatchFetcher batch size, and returns a function that restores it.
    34  // This is to be used only in tests - we have no test coverage for arbitrary kv batch sizes at this time.
    35  func TestingSetKVBatchSize(val int64) func() {
    36  	oldVal := kvBatchSize
    37  	kvBatchSize = val
    38  	return func() { kvBatchSize = oldVal }
    39  }
    40  
    41  // sendFunc is the function used to execute a KV batch; normally
    42  // wraps (*client.Txn).Send.
    43  type sendFunc func(
    44  	ctx context.Context, ba roachpb.BatchRequest,
    45  ) (*roachpb.BatchResponse, error)
    46  
    47  // txnKVFetcher handles retrieval of key/values.
    48  type txnKVFetcher struct {
    49  	// "Constant" fields, provided by the caller.
    50  	sendFn sendFunc
    51  	spans  roachpb.Spans
    52  	// If useBatchLimit is true, batches are limited to kvBatchSize. If
    53  	// firstBatchLimit is also set, the first batch is limited to that value.
    54  	// Subsequent batches are larger, up to kvBatchSize.
    55  	firstBatchLimit int64
    56  	useBatchLimit   bool
    57  	reverse         bool
    58  	// lockStr represents the locking mode to use when fetching KVs.
    59  	lockStr sqlbase.ScanLockingStrength
    60  	// returnRangeInfo, if set, causes the kvBatchFetcher to populate rangeInfos.
    61  	// See also rowFetcher.returnRangeInfo.
    62  	returnRangeInfo bool
    63  
    64  	fetchEnd bool
    65  	batchIdx int
    66  
    67  	// requestSpans contains the spans that were requested in the last request,
    68  	// and is one to one with responses. This field is kept separately from spans
    69  	// so that the fetcher can keep track of which response was produced for each
    70  	// input span.
    71  	requestSpans roachpb.Spans
    72  	responses    []roachpb.ResponseUnion
    73  
    74  	// As the kvBatchFetcher fetches batches of kvs, it accumulates information on the
    75  	// replicas where the batches came from. This info can be retrieved through
    76  	// getRangeInfo(), to be used for updating caches.
    77  	// rangeInfos are deduped, so they're not ordered in any particular way and
    78  	// they don't map to kvBatchFetcher.spans in any particular way.
    79  	rangeInfos       []roachpb.RangeInfo
    80  	origSpan         roachpb.Span
    81  	remainingBatches [][]byte
    82  }
    83  
    84  var _ kvBatchFetcher = &txnKVFetcher{}
    85  
    86  func (f *txnKVFetcher) GetRangesInfo() []roachpb.RangeInfo {
    87  	if !f.returnRangeInfo {
    88  		panic(errors.AssertionFailedf("GetRangesInfo() called on kvBatchFetcher that wasn't configured with returnRangeInfo"))
    89  	}
    90  	return f.rangeInfos
    91  }
    92  
    93  // getBatchSize returns the max size of the next batch.
    94  func (f *txnKVFetcher) getBatchSize() int64 {
    95  	return f.getBatchSizeForIdx(f.batchIdx)
    96  }
    97  
    98  func (f *txnKVFetcher) getBatchSizeForIdx(batchIdx int) int64 {
    99  	if !f.useBatchLimit {
   100  		return 0
   101  	}
   102  	if f.firstBatchLimit == 0 || f.firstBatchLimit >= kvBatchSize {
   103  		return kvBatchSize
   104  	}
   105  
   106  	// We grab the first batch according to the limit. If it turns out that we
   107  	// need another batch, we grab a bigger batch. If that's still not enough,
   108  	// we revert to the default batch size.
   109  	switch batchIdx {
   110  	case 0:
   111  		return f.firstBatchLimit
   112  
   113  	case 1:
   114  		// Make the second batch 10 times larger (but at most the default batch
   115  		// size and at least 1/10 of the default batch size). Sample
   116  		// progressions of batch sizes:
   117  		//
   118  		//  First batch | Second batch | Subsequent batches
   119  		//  -----------------------------------------------
   120  		//         1    |     1,000     |     10,000
   121  		//       100    |     1,000     |     10,000
   122  		//       500    |     5,000     |     10,000
   123  		//      1000    |    10,000     |     10,000
   124  		secondBatch := f.firstBatchLimit * 10
   125  		switch {
   126  		case secondBatch < kvBatchSize/10:
   127  			return kvBatchSize / 10
   128  		case secondBatch > kvBatchSize:
   129  			return kvBatchSize
   130  		default:
   131  			return secondBatch
   132  		}
   133  
   134  	default:
   135  		return kvBatchSize
   136  	}
   137  }
   138  
   139  // getKeyLockingStrength returns the configured per-key locking strength to use
   140  // for key-value scans.
   141  func (f *txnKVFetcher) getKeyLockingStrength() lock.Strength {
   142  	switch f.lockStr {
   143  	case sqlbase.ScanLockingStrength_FOR_NONE:
   144  		return lock.None
   145  
   146  	case sqlbase.ScanLockingStrength_FOR_KEY_SHARE:
   147  		// Promote to FOR_SHARE.
   148  		fallthrough
   149  	case sqlbase.ScanLockingStrength_FOR_SHARE:
   150  		// We currently perform no per-key locking when FOR_SHARE is used
   151  		// because Shared locks have not yet been implemented.
   152  		return lock.None
   153  
   154  	case sqlbase.ScanLockingStrength_FOR_NO_KEY_UPDATE:
   155  		// Promote to FOR_UPDATE.
   156  		fallthrough
   157  	case sqlbase.ScanLockingStrength_FOR_UPDATE:
   158  		// We currently perform exclusive per-key locking when FOR_UPDATE is
   159  		// used because Upgrade locks have not yet been implemented.
   160  		return lock.Exclusive
   161  
   162  	default:
   163  		panic(fmt.Sprintf("unknown locking strength %s", f.lockStr))
   164  	}
   165  }
   166  
   167  // makeKVBatchFetcher initializes a kvBatchFetcher for the given spans.
   168  //
   169  // If useBatchLimit is true, batches are limited to kvBatchSize. If
   170  // firstBatchLimit is also set, the first batch is limited to that value.
   171  // Subsequent batches are larger, up to kvBatchSize.
   172  //
   173  // Batch limits can only be used if the spans are ordered.
   174  func makeKVBatchFetcher(
   175  	txn *kv.Txn,
   176  	spans roachpb.Spans,
   177  	reverse bool,
   178  	useBatchLimit bool,
   179  	firstBatchLimit int64,
   180  	lockStr sqlbase.ScanLockingStrength,
   181  	returnRangeInfo bool,
   182  ) (txnKVFetcher, error) {
   183  	sendFn := func(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) {
   184  		res, err := txn.Send(ctx, ba)
   185  		if err != nil {
   186  			return nil, err.GoError()
   187  		}
   188  		return res, nil
   189  	}
   190  	return makeKVBatchFetcherWithSendFunc(
   191  		sendFn, spans, reverse, useBatchLimit, firstBatchLimit, lockStr, returnRangeInfo,
   192  	)
   193  }
   194  
   195  // makeKVBatchFetcherWithSendFunc is like makeKVBatchFetcher but uses a custom
   196  // send function.
   197  func makeKVBatchFetcherWithSendFunc(
   198  	sendFn sendFunc,
   199  	spans roachpb.Spans,
   200  	reverse bool,
   201  	useBatchLimit bool,
   202  	firstBatchLimit int64,
   203  	lockStr sqlbase.ScanLockingStrength,
   204  	returnRangeInfo bool,
   205  ) (txnKVFetcher, error) {
   206  	if firstBatchLimit < 0 || (!useBatchLimit && firstBatchLimit != 0) {
   207  		return txnKVFetcher{}, errors.Errorf("invalid batch limit %d (useBatchLimit: %t)",
   208  			firstBatchLimit, useBatchLimit)
   209  	}
   210  
   211  	if useBatchLimit {
   212  		// Verify the spans are ordered if a batch limit is used.
   213  		for i := 1; i < len(spans); i++ {
   214  			if spans[i].Key.Compare(spans[i-1].EndKey) < 0 {
   215  				return txnKVFetcher{}, errors.Errorf("unordered spans (%s %s)", spans[i-1], spans[i])
   216  			}
   217  		}
   218  	} else if util.RaceEnabled {
   219  		// Otherwise, just verify the spans don't contain consecutive overlapping
   220  		// spans.
   221  		for i := 1; i < len(spans); i++ {
   222  			if spans[i].Key.Compare(spans[i-1].EndKey) >= 0 {
   223  				// Current span's start key is greater than or equal to the last span's
   224  				// end key - we're good.
   225  				continue
   226  			} else if spans[i].EndKey.Compare(spans[i-1].Key) < 0 {
   227  				// Current span's end key is less than or equal to the last span's start
   228  				// key - also good.
   229  				continue
   230  			}
   231  			// Otherwise, the two spans overlap, which isn't allowed - it leaves us at
   232  			// risk of incorrect results, since the row fetcher can't distinguish
   233  			// between identical rows in two different batches.
   234  			return txnKVFetcher{}, errors.Errorf("overlapping neighbor spans (%s %s)", spans[i-1], spans[i])
   235  		}
   236  	}
   237  
   238  	// Make a copy of the spans because we update them.
   239  	copySpans := make(roachpb.Spans, len(spans))
   240  	for i := range spans {
   241  		if reverse {
   242  			// Reverse scans receive the spans in decreasing order.
   243  			copySpans[len(spans)-i-1] = spans[i]
   244  		} else {
   245  			copySpans[i] = spans[i]
   246  		}
   247  	}
   248  
   249  	return txnKVFetcher{
   250  		sendFn:          sendFn,
   251  		spans:           copySpans,
   252  		reverse:         reverse,
   253  		useBatchLimit:   useBatchLimit,
   254  		firstBatchLimit: firstBatchLimit,
   255  		lockStr:         lockStr,
   256  		returnRangeInfo: returnRangeInfo,
   257  	}, nil
   258  }
   259  
   260  // fetch retrieves spans from the kv
   261  func (f *txnKVFetcher) fetch(ctx context.Context) error {
   262  	var ba roachpb.BatchRequest
   263  	ba.Header.MaxSpanRequestKeys = f.getBatchSize()
   264  	if ba.Header.MaxSpanRequestKeys > 0 {
   265  		// If this kvfetcher limits the number of rows returned, also use
   266  		// target bytes to guard against the case in which the average row
   267  		// is very large.
   268  		// If no limit is set, the assumption is that SQL *knows* that there
   269  		// is only a "small" amount of data to be read, and wants to preserve
   270  		// concurrency for this request inside of DistSender, which setting
   271  		// TargetBytes would interfere with.
   272  		ba.Header.TargetBytes = 10 * (1 << 20)
   273  	}
   274  	ba.Header.ReturnRangeInfo = f.returnRangeInfo
   275  	ba.Requests = make([]roachpb.RequestUnion, len(f.spans))
   276  	keyLocking := f.getKeyLockingStrength()
   277  	if f.reverse {
   278  		scans := make([]roachpb.ReverseScanRequest, len(f.spans))
   279  		for i := range f.spans {
   280  			scans[i].SetSpan(f.spans[i])
   281  			scans[i].ScanFormat = roachpb.BATCH_RESPONSE
   282  			scans[i].KeyLocking = keyLocking
   283  			ba.Requests[i].MustSetInner(&scans[i])
   284  		}
   285  	} else {
   286  		scans := make([]roachpb.ScanRequest, len(f.spans))
   287  		for i := range f.spans {
   288  			scans[i].SetSpan(f.spans[i])
   289  			scans[i].ScanFormat = roachpb.BATCH_RESPONSE
   290  			scans[i].KeyLocking = keyLocking
   291  			ba.Requests[i].MustSetInner(&scans[i])
   292  		}
   293  	}
   294  	if cap(f.requestSpans) < len(f.spans) {
   295  		f.requestSpans = make(roachpb.Spans, len(f.spans))
   296  	} else {
   297  		f.requestSpans = f.requestSpans[:len(f.spans)]
   298  	}
   299  	copy(f.requestSpans, f.spans)
   300  
   301  	if log.ExpensiveLogEnabled(ctx, 2) {
   302  		var buf bytes.Buffer
   303  		for i, span := range f.spans {
   304  			if i != 0 {
   305  				buf.WriteString(", ")
   306  			}
   307  			buf.WriteString(span.String())
   308  		}
   309  		log.VEventf(ctx, 2, "Scan %s", buf.String())
   310  	}
   311  
   312  	// Reset spans in preparation for adding resume-spans below.
   313  	f.spans = f.spans[:0]
   314  
   315  	br, err := f.sendFn(ctx, ba)
   316  	if err != nil {
   317  		return err
   318  	}
   319  	if br != nil {
   320  		f.responses = br.Responses
   321  	} else {
   322  		f.responses = nil
   323  	}
   324  
   325  	// Set end to true until disproved.
   326  	f.fetchEnd = true
   327  	var sawResumeSpan bool
   328  	for _, resp := range f.responses {
   329  		reply := resp.GetInner()
   330  		header := reply.Header()
   331  
   332  		if header.NumKeys > 0 && sawResumeSpan {
   333  			return errors.Errorf(
   334  				"span with results after resume span; it shouldn't happen given that "+
   335  					"we're only scanning non-overlapping spans. New spans: %s",
   336  				sqlbase.PrettySpans(nil, f.spans, 0 /* skip */))
   337  		}
   338  
   339  		if resumeSpan := header.ResumeSpan; resumeSpan != nil {
   340  			// A span needs to be resumed.
   341  			f.fetchEnd = false
   342  			f.spans = append(f.spans, *resumeSpan)
   343  			// Verify we don't receive results for any remaining spans.
   344  			sawResumeSpan = true
   345  		}
   346  
   347  		// Fill up the RangeInfos, in case we got any.
   348  		if f.returnRangeInfo {
   349  			for _, ri := range header.RangeInfos {
   350  				f.rangeInfos = roachpb.InsertRangeInfo(f.rangeInfos, ri)
   351  			}
   352  		}
   353  	}
   354  
   355  	f.batchIdx++
   356  
   357  	// TODO(radu): We should fetch the next chunk in the background instead of waiting for the next
   358  	// call to fetch(). We can use a pool of workers to issue the KV ops which will also limit the
   359  	// total number of fetches that happen in parallel (and thus the amount of resources we use).
   360  	return nil
   361  }
   362  
   363  // nextBatch returns the next batch of key/value pairs. If there are none
   364  // available, a fetch is initiated. When there are no more keys, ok is false.
   365  // origSpan returns the span that batch was fetched from, and bounds all of the
   366  // keys returned.
   367  func (f *txnKVFetcher) nextBatch(
   368  	ctx context.Context,
   369  ) (ok bool, kvs []roachpb.KeyValue, batchResponse []byte, origSpan roachpb.Span, err error) {
   370  	if len(f.remainingBatches) > 0 {
   371  		batch := f.remainingBatches[0]
   372  		f.remainingBatches = f.remainingBatches[1:]
   373  		return true, nil, batch, f.origSpan, nil
   374  	}
   375  	if len(f.responses) > 0 {
   376  		reply := f.responses[0].GetInner()
   377  		f.responses = f.responses[1:]
   378  		origSpan := f.requestSpans[0]
   379  		f.requestSpans = f.requestSpans[1:]
   380  		var batchResp []byte
   381  		switch t := reply.(type) {
   382  		case *roachpb.ScanResponse:
   383  			if len(t.BatchResponses) > 0 {
   384  				batchResp = t.BatchResponses[0]
   385  				f.remainingBatches = t.BatchResponses[1:]
   386  			}
   387  			return true, t.Rows, batchResp, origSpan, nil
   388  		case *roachpb.ReverseScanResponse:
   389  			if len(t.BatchResponses) > 0 {
   390  				batchResp = t.BatchResponses[0]
   391  				f.remainingBatches = t.BatchResponses[1:]
   392  			}
   393  			return true, t.Rows, batchResp, origSpan, nil
   394  		}
   395  	}
   396  	if f.fetchEnd {
   397  		return false, nil, nil, roachpb.Span{}, nil
   398  	}
   399  	if err := f.fetch(ctx); err != nil {
   400  		return false, nil, nil, roachpb.Span{}, err
   401  	}
   402  	return f.nextBatch(ctx)
   403  }