github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/batcheval/split_stats_helper.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package batcheval
    12  
    13  import "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    14  
    15  // splitStatsHelper codifies and explains the stats computations related to a
    16  // split. The quantities known during a split (i.e. while the split trigger
    17  // is evaluating) are
    18  //
    19  // - AbsPreSplitBothEstimated: the stats of the range before the split trigger,
    20  //   i.e. without accounting for any writes in the batch. This can have
    21  //   ContainsEstimates set.
    22  // - DeltaBatchEstimated: the writes in the batch, i.e. the stats delta accrued
    23  //   from the evaluation of the EndTxn so far (this is mostly the write to the
    24  //   transaction record, as well as resolving the intent on the range descriptor,
    25  //   but nothing in this code relies on that). Since we have no reason to
    26  //   introduce ContainsEstimates in a split trigger, this typically has
    27  //   ContainsEstimates unset, but the results will be estimate free either way.
    28  // - AbsPostSplitLeft: the stats of the range after applying the split, i.e.
    29  //   accounting both for the shrinking as well as for the writes in DeltaBatch
    30  //   related to the shrunk keyrange.
    31  //   In practice, we obtain this by recomputing the stats, and so we don't
    32  //   expect ContainsEstimates to be set in them.
    33  //
    34  // We are interested in computing from this the quantities
    35  //
    36  // - AbsPostSplitRight(): the stats of the right hand side created by the split,
    37  //   i.e. the data taken over from the left hand side plus whatever was written to
    38  //   the right hand side in the process (metadata etc). We can recompute this, but
    39  //   try to avoid it unless necessary (when CombinedErrorDelta below is nonzero).
    40  // - DeltaPostSplitLeft(): the stats delta that should be emitted by the split
    41  //   trigger itself, i.e. the data which the left hand side (initially comprising
    42  //   both halves) loses by moving data into the right hand side (including whatever
    43  //   DeltaBatch contained in contributions attributable to the keyspace on the
    44  //   left).
    45  // - CombinedErrorDelta: the difference between (AbsPreSplitBoth+DeltaBatch) and
    46  //   the recomputation of the pre-split range including the batch. This is zero if
    47  //   neither of the inputs contains estimates. If it's not zero, we need to
    48  //   recompute from scratch to obtain AbsPostSplitRight. What's interesting about
    49  //   this quantity is that we never care what exactly it is, but we do care
    50  //   whether it's zero or not because if it's zero we get to do less work.
    51  //
    52  // Moreover, we want both neither of AbsPostSplit{Right,Left} to end up with
    53  // estimates. The way splits are set up right now, we sort of get this "for
    54  // free" for the left hand side (since we recompute that unconditionally; there
    55  // is a guarantee that the left hand side is never too large). We also don't
    56  // want to create new ranges that start out with estimates (just to prevent the
    57  // unbounded proliferation of estimates).
    58  //
    59  // The two unknown quantities can be expressed in terms of the known quantities
    60  // because
    61  //
    62  // (1) AbsPreSplitBoth + DeltaBatch
    63  // 	                   - CombinedErrorDelta = AbsPostSplitLeft + AbsPostSplitRight
    64  //
    65  // In words, this corresponds to "all bytes are accounted for": from the initial
    66  // stats that we have (accounting for the fact that AbsPreSplitBoth+DeltaBatch
    67  // may contain estimates), everything we add/remove during the split ends up
    68  // tracked either on the left and on the right, and nothing is created out of
    69  // thin air.
    70  //
    71  // (2) AbsPreSplitBoth + DeltaPostSplitLeft() = AbsPostSplitLeft
    72  //
    73  // This expresses the fact that is always true whenever a command applies on a
    74  // range without introducing an estimate: the stats before the command plus the
    75  // delta emitted by the command equal the stats after the command. In this case,
    76  // the stats before the command are that of the range before the split (remember
    77  // that the split shrinks the range towards the start key, i.e. the left hand
    78  // side is the same range as the pre-split one).
    79  //
    80  // These two equations are easily solved for the unknowns. First, we can express
    81  // DeltaPostSplitLeft() in known quantities via (2) as
    82  //
    83  //     DeltaPostSplitLeft() = AbsPostSplitLeft - AbsPreSplitBothEstimated.
    84  //
    85  // Note that if we start out with estimates, DeltaPostSplitLeft() will wipe out
    86  // those estimates when added to the absolute stats.
    87  //
    88  // For AbsPostSplitRight(), there are two cases. First, due to the identity
    89  //
    90  //     CombinedErrorDelta =   AbsPreSplitBothEstimated + DeltaBatchEstimated
    91  //                          -(AbsPostSplitLeft + AbsPostSplitRight)
    92  //
    93  // and the fact that the second line contains no estimates, we know that
    94  // CombinedErrorDelta is zero if the first line contains no estimates. Using
    95  // this, we can rearrange as
    96  //
    97  //     AbsPostSplitRight() = AbsPreSplitBoth + DeltaBatch - AbsPostSplitLeft.
    98  //
    99  // where all quantities on the right are known. If CombinedErrorDelta is
   100  // nonzero, we effectively have one more unknown in our linear system and we
   101  // need to recompute AbsPostSplitRight from scratch. (As fallout, we can in
   102  // principle compute CombinedError, but we don't care).
   103  type splitStatsHelper struct {
   104  	in splitStatsHelperInput
   105  
   106  	absPostSplitRight *enginepb.MVCCStats
   107  }
   108  
   109  // splitStatsHelperInput is passed to makeSplitStatsHelper.
   110  type splitStatsHelperInput struct {
   111  	AbsPreSplitBothEstimated enginepb.MVCCStats
   112  	DeltaBatchEstimated      enginepb.MVCCStats
   113  	AbsPostSplitLeft         enginepb.MVCCStats
   114  	// AbsPostSplitRightFn returns the stats for the right hand side of the
   115  	// split. This is only called (and only once) when either of the first two
   116  	// fields above contains estimates, so that we can guarantee that the
   117  	// post-splits stats don't.
   118  	AbsPostSplitRightFn func() (enginepb.MVCCStats, error)
   119  }
   120  
   121  // makeSplitStatsHelper initializes a splitStatsHelper. The values in the input
   122  // are assumed to not change outside of the helper and must no longer be used.
   123  // The provided AbsPostSplitRightFn recomputes the right hand side of the split
   124  // after accounting for the split trigger batch. This is only invoked at most
   125  // once, and only when necessary.
   126  func makeSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelper, error) {
   127  	h := splitStatsHelper{
   128  		in: input,
   129  	}
   130  
   131  	if h.in.AbsPreSplitBothEstimated.ContainsEstimates == 0 &&
   132  		h.in.DeltaBatchEstimated.ContainsEstimates == 0 {
   133  		// We have CombinedErrorDelta zero, so use arithmetic to compute
   134  		// AbsPostSplitRight().
   135  		ms := h.in.AbsPreSplitBothEstimated
   136  		ms.Subtract(h.in.AbsPostSplitLeft)
   137  		ms.Add(h.in.DeltaBatchEstimated)
   138  		h.absPostSplitRight = &ms
   139  		return h, nil
   140  	}
   141  	// Estimates are contained in the input, so ask the oracle for
   142  	// AbsPostSplitRight().
   143  	ms, err := input.AbsPostSplitRightFn()
   144  	if err != nil {
   145  		return splitStatsHelper{}, err
   146  	}
   147  	h.absPostSplitRight = &ms
   148  	return h, nil
   149  }
   150  
   151  // AbsPostSplitRight returns the stats of the right hand side created by the
   152  // split. The result is returned as a pointer because the caller can freely
   153  // modify it, assuming they're adding only stats corresponding to mutations that
   154  // they know only affect the right hand side. (If estimates are introduced in
   155  // the process, the right hand side will start out with estimates). Implicitly
   156  // this changes the DeltaBatchEstimated supplied to makeSplitStatsHelper, but
   157  // the contract assumes that that value will no longer be used.
   158  func (h splitStatsHelper) AbsPostSplitRight() *enginepb.MVCCStats {
   159  	return h.absPostSplitRight
   160  }
   161  
   162  // DeltaPostSplitLeft return the stats delta to be emitted on the left hand side
   163  // as the result of the split. It accounts for the data moved to the right hand
   164  // side as well as any mutations to the left hand side carried out during the
   165  // split, and additionally removes any estimates present in the pre-split stats.
   166  func (h splitStatsHelper) DeltaPostSplitLeft() enginepb.MVCCStats {
   167  	// NB: if we ever wanted to also write to the left hand side after init'ing
   168  	// the helper, we can make that work, too.
   169  	// NB: note how none of this depends on mutations to absPostSplitRight.
   170  	ms := h.in.AbsPostSplitLeft
   171  	ms.Subtract(h.in.AbsPreSplitBothEstimated)
   172  
   173  	return ms
   174  }