github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/enginepb/mvcc.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package enginepb
    12  
    13  import (
    14  	"fmt"
    15  	"io"
    16  	"math"
    17  	"sort"
    18  	"strings"
    19  
    20  	"github.com/cockroachdb/errors"
    21  )
    22  
    23  // TxnEpoch is a zero-indexed epoch for a transaction. When a transaction
    24  // retries, it increments its epoch, invalidating all of its previous writes.
    25  type TxnEpoch int32
    26  
    27  // TxnSeq is a zero-indexed sequence number assigned to a request performed by a
    28  // transaction. Writes within a transaction have unique sequences and start at
    29  // sequence number 1. Reads within a transaction have non-unique sequences and
    30  // start at sequence number 0.
    31  //
    32  // Writes within a transaction logically take place in sequence number order.
    33  // Reads within a transaction observe only writes performed by the transaction
    34  // at equal or lower sequence numbers.
    35  type TxnSeq int32
    36  
    37  // TxnPriority defines the priority that a transaction operates at. Transactions
    38  // with high priorities are preferred over transaction with low priorities when
    39  // resolving conflicts between themselves. For example, transaction priorities
    40  // are used to determine which transaction to abort when resolving transaction
    41  // deadlocks.
    42  type TxnPriority int32
    43  
    44  const (
    45  	// MinTxnPriority is the minimum allowed txn priority.
    46  	MinTxnPriority TxnPriority = 0
    47  	// MaxTxnPriority is the maximum allowed txn priority.
    48  	MaxTxnPriority TxnPriority = math.MaxInt32
    49  )
    50  
    51  // TxnSeqIsIgnored returns true iff the sequence number overlaps with
    52  // any range in the ignored array.
    53  func TxnSeqIsIgnored(seq TxnSeq, ignored []IgnoredSeqNumRange) bool {
    54  	// The ignored seqnum ranges are guaranteed to be
    55  	// non-overlapping, non-contiguous, and guaranteed to be
    56  	// sorted in seqnum order. We're going to look from the end to
    57  	// see if the current intent seqnum is ignored.
    58  	for i := len(ignored) - 1; i >= 0; i-- {
    59  		if seq < ignored[i].Start {
    60  			// The history entry's sequence number is lower/older than
    61  			// the current ignored range. Go to the previous range
    62  			// and try again.
    63  			continue
    64  		}
    65  
    66  		// Here we have a range where the start seqnum is lower than the current
    67  		// intent seqnum. Does it include it?
    68  		if seq > ignored[i].End {
    69  			// Here we have a range where the current history entry's seqnum
    70  			// is higher than the range's end seqnum. Given that the
    71  			// ranges are sorted, we're guaranteed that there won't
    72  			// be any further overlapping range at a lower value of i.
    73  			return false
    74  		}
    75  		// Yes, it's included. We're going to skip over this
    76  		// intent seqnum and retry the search above.
    77  		return true
    78  	}
    79  
    80  	// Exhausted the ignore list. Not ignored.
    81  	return false
    82  }
    83  
    84  // Short returns a prefix of the transaction's ID.
    85  func (t TxnMeta) Short() string {
    86  	return t.ID.Short()
    87  }
    88  
    89  // Total returns the range size as the sum of the key and value
    90  // bytes. This includes all non-live keys and all versioned values.
    91  func (ms MVCCStats) Total() int64 {
    92  	return ms.KeyBytes + ms.ValBytes
    93  }
    94  
    95  // GCBytes is a convenience function which returns the number of gc bytes,
    96  // that is the key and value bytes excluding the live bytes.
    97  func (ms MVCCStats) GCBytes() int64 {
    98  	return ms.KeyBytes + ms.ValBytes - ms.LiveBytes
    99  }
   100  
   101  // AvgIntentAge returns the average age of outstanding intents,
   102  // based on current wall time specified via nowNanos.
   103  func (ms MVCCStats) AvgIntentAge(nowNanos int64) float64 {
   104  	if ms.IntentCount == 0 {
   105  		return 0
   106  	}
   107  	// Advance age by any elapsed time since last computed. Note that
   108  	// we operate on a copy.
   109  	ms.AgeTo(nowNanos)
   110  	return float64(ms.IntentAge) / float64(ms.IntentCount)
   111  }
   112  
   113  // GCByteAge returns the total age of outstanding gc'able
   114  // bytes, based on current wall time specified via nowNanos.
   115  // nowNanos is ignored if it's a past timestamp as seen by
   116  // rs.LastUpdateNanos.
   117  func (ms MVCCStats) GCByteAge(nowNanos int64) int64 {
   118  	ms.AgeTo(nowNanos) // we operate on a copy
   119  	return ms.GCBytesAge
   120  }
   121  
   122  // Forward is like AgeTo, but if nowNanos is not ahead of ms.LastUpdateNanos,
   123  // this method is a noop.
   124  func (ms *MVCCStats) Forward(nowNanos int64) {
   125  	if ms.LastUpdateNanos >= nowNanos {
   126  		return
   127  	}
   128  	ms.AgeTo(nowNanos)
   129  }
   130  
   131  // AgeTo encapsulates the complexity of computing the increment in age
   132  // quantities contained in MVCCStats. Two MVCCStats structs only add and
   133  // subtract meaningfully if their LastUpdateNanos matches, so aging them to
   134  // the max of their LastUpdateNanos is a prerequisite, though Add() takes
   135  // care of this internally.
   136  func (ms *MVCCStats) AgeTo(nowNanos int64) {
   137  	// Seconds are counted every time each individual nanosecond timestamp
   138  	// crosses a whole second boundary (i.e. is zero mod 1E9). Thus it would
   139  	// be a mistake to use the (nonequivalent) expression (a-b)/1E9.
   140  	diffSeconds := nowNanos/1e9 - ms.LastUpdateNanos/1e9
   141  
   142  	ms.GCBytesAge += ms.GCBytes() * diffSeconds
   143  	ms.IntentAge += ms.IntentCount * diffSeconds
   144  	ms.LastUpdateNanos = nowNanos
   145  }
   146  
   147  // Add adds values from oms to ms. The ages will be moved forward to the
   148  // larger of the LastUpdateNano timestamps involved.
   149  func (ms *MVCCStats) Add(oms MVCCStats) {
   150  	// Enforce the max LastUpdateNanos for both ages based on their
   151  	// pre-addition state.
   152  	ms.Forward(oms.LastUpdateNanos)
   153  	oms.Forward(ms.LastUpdateNanos) // on local copy
   154  
   155  	ms.ContainsEstimates += oms.ContainsEstimates
   156  
   157  	// Now that we've done that, we may just add them.
   158  	ms.IntentAge += oms.IntentAge
   159  	ms.GCBytesAge += oms.GCBytesAge
   160  	ms.LiveBytes += oms.LiveBytes
   161  	ms.KeyBytes += oms.KeyBytes
   162  	ms.ValBytes += oms.ValBytes
   163  	ms.IntentBytes += oms.IntentBytes
   164  	ms.LiveCount += oms.LiveCount
   165  	ms.KeyCount += oms.KeyCount
   166  	ms.ValCount += oms.ValCount
   167  	ms.IntentCount += oms.IntentCount
   168  	ms.SysBytes += oms.SysBytes
   169  	ms.SysCount += oms.SysCount
   170  }
   171  
   172  // Subtract removes oms from ms. The ages will be moved forward to the larger of
   173  // the LastUpdateNano timestamps involved.
   174  func (ms *MVCCStats) Subtract(oms MVCCStats) {
   175  	// Enforce the max LastUpdateNanos for both ages based on their
   176  	// pre-subtraction state.
   177  	ms.Forward(oms.LastUpdateNanos)
   178  	oms.Forward(ms.LastUpdateNanos)
   179  
   180  	ms.ContainsEstimates -= oms.ContainsEstimates
   181  
   182  	// Now that we've done that, we may subtract.
   183  	ms.IntentAge -= oms.IntentAge
   184  	ms.GCBytesAge -= oms.GCBytesAge
   185  	ms.LiveBytes -= oms.LiveBytes
   186  	ms.KeyBytes -= oms.KeyBytes
   187  	ms.ValBytes -= oms.ValBytes
   188  	ms.IntentBytes -= oms.IntentBytes
   189  	ms.LiveCount -= oms.LiveCount
   190  	ms.KeyCount -= oms.KeyCount
   191  	ms.ValCount -= oms.ValCount
   192  	ms.IntentCount -= oms.IntentCount
   193  	ms.SysBytes -= oms.SysBytes
   194  	ms.SysCount -= oms.SysCount
   195  }
   196  
   197  // IsInline returns true if the value is inlined in the metadata.
   198  func (meta MVCCMetadata) IsInline() bool {
   199  	return meta.RawBytes != nil
   200  }
   201  
   202  // AddToIntentHistory adds the sequence and value to the intent history.
   203  func (meta *MVCCMetadata) AddToIntentHistory(seq TxnSeq, val []byte) {
   204  	meta.IntentHistory = append(meta.IntentHistory,
   205  		MVCCMetadata_SequencedIntent{Sequence: seq, Value: val})
   206  }
   207  
   208  // GetPrevIntentSeq goes through the intent history and finds the previous
   209  // intent's sequence number given the current sequence.
   210  func (meta *MVCCMetadata) GetPrevIntentSeq(
   211  	seq TxnSeq, ignored []IgnoredSeqNumRange,
   212  ) (MVCCMetadata_SequencedIntent, bool) {
   213  	end := len(meta.IntentHistory)
   214  	found := 0
   215  	for {
   216  		index := sort.Search(end, func(i int) bool {
   217  			return meta.IntentHistory[i].Sequence >= seq
   218  		})
   219  		if index == 0 {
   220  			// It is possible that no intent exists such that the sequence is less
   221  			// than the read sequence. In this case, we cannot read a value from the
   222  			// intent history.
   223  			return MVCCMetadata_SequencedIntent{}, false
   224  		}
   225  		candidate := index - 1
   226  		if TxnSeqIsIgnored(meta.IntentHistory[candidate].Sequence, ignored) {
   227  			// This entry was part of an ignored range. Skip it and
   228  			// try the search again, using the current position as new
   229  			// upper bound.
   230  			end = candidate
   231  			continue
   232  		}
   233  		// This history entry has not been ignored, so we're going to keep it.
   234  		found = candidate
   235  		break
   236  	}
   237  	return meta.IntentHistory[found], true
   238  }
   239  
   240  // GetIntentValue goes through the intent history and finds the value
   241  // written at the sequence number.
   242  func (meta *MVCCMetadata) GetIntentValue(seq TxnSeq) ([]byte, bool) {
   243  	index := sort.Search(len(meta.IntentHistory), func(i int) bool {
   244  		return meta.IntentHistory[i].Sequence >= seq
   245  	})
   246  	if index < len(meta.IntentHistory) && meta.IntentHistory[index].Sequence == seq {
   247  		return meta.IntentHistory[index].Value, true
   248  	}
   249  	return nil, false
   250  }
   251  
   252  // String implements the fmt.Stringer interface.
   253  func (m *MVCCMetadata_SequencedIntent) String() string {
   254  	var buf strings.Builder
   255  	m.FormatW(&buf, false /* expand */)
   256  	return buf.String()
   257  }
   258  
   259  // Format implements the fmt.Formatter interface.
   260  func (m *MVCCMetadata_SequencedIntent) Format(f fmt.State, r rune) {
   261  	m.FormatW(f, f.Flag('+'))
   262  }
   263  
   264  // FormatW enables grouping formatters around a single buffer while
   265  // avoiding copies.
   266  func (m *MVCCMetadata_SequencedIntent) FormatW(buf io.Writer, expand bool) {
   267  	fmt.Fprintf(buf,
   268  		"{%d %s}",
   269  		m.Sequence,
   270  		FormatBytesAsValue(m.Value))
   271  }
   272  
   273  // String implements the fmt.Stringer interface.
   274  func (meta *MVCCMetadata) String() string {
   275  	var buf strings.Builder
   276  	meta.FormatW(&buf, false /* expand */)
   277  	return buf.String()
   278  }
   279  
   280  // Format implements the fmt.Formatter interface.
   281  func (meta *MVCCMetadata) Format(f fmt.State, r rune) {
   282  	meta.FormatW(f, f.Flag('+'))
   283  }
   284  
   285  // FormatW enables grouping formatters around a single buffer while
   286  // avoiding copies.
   287  func (meta *MVCCMetadata) FormatW(buf io.Writer, expand bool) {
   288  	fmt.Fprintf(buf, "txn={%s} ts=%s del=%t klen=%d vlen=%d",
   289  		meta.Txn,
   290  		meta.Timestamp,
   291  		meta.Deleted,
   292  		meta.KeyBytes,
   293  		meta.ValBytes,
   294  	)
   295  	if len(meta.RawBytes) > 0 {
   296  		if expand {
   297  			fmt.Fprintf(buf, " raw=%s", FormatBytesAsValue(meta.RawBytes))
   298  		} else {
   299  			fmt.Fprintf(buf, " rawlen=%d", len(meta.RawBytes))
   300  		}
   301  	}
   302  	if nih := len(meta.IntentHistory); nih > 0 {
   303  		if expand {
   304  			fmt.Fprint(buf, " ih={")
   305  			for i := range meta.IntentHistory {
   306  				meta.IntentHistory[i].FormatW(buf, expand)
   307  			}
   308  			fmt.Fprint(buf, "}")
   309  		} else {
   310  			fmt.Fprintf(buf, " nih=%d", nih)
   311  		}
   312  	}
   313  }
   314  
   315  func (meta *MVCCMetadataSubsetForMergeSerialization) String() string {
   316  	var m MVCCMetadata
   317  	m.RawBytes = meta.RawBytes
   318  	m.MergeTimestamp = meta.MergeTimestamp
   319  	return m.String()
   320  }
   321  
   322  // SafeMessage implements the SafeMessager interface.
   323  //
   324  // This method should be kept largely synchronized with String(), except that it
   325  // can't include sensitive info (e.g. the transaction key).
   326  func (meta *MVCCMetadata) SafeMessage() string {
   327  	var buf strings.Builder
   328  	fmt.Fprintf(&buf, "{%s} ts=%s del=%t klen=%d vlen=%d",
   329  		meta.Txn.SafeMessage(),
   330  		meta.Timestamp,
   331  		meta.Deleted,
   332  		meta.KeyBytes,
   333  		meta.ValBytes,
   334  	)
   335  	if len(meta.RawBytes) > 0 {
   336  		fmt.Fprintf(&buf, " rawlen=%d", len(meta.RawBytes))
   337  	}
   338  	if nih := len(meta.IntentHistory); nih > 0 {
   339  		fmt.Fprintf(&buf, " nih=%d", nih)
   340  	}
   341  	return buf.String()
   342  }
   343  
   344  // String implements the fmt.Stringer interface.
   345  // We implement by value as the object may not reside on the heap.
   346  func (t TxnMeta) String() string {
   347  	var buf strings.Builder
   348  	t.FormatW(&buf)
   349  	return buf.String()
   350  }
   351  
   352  // FormatW enables grouping formatters around a single buffer while
   353  // avoiding copies.
   354  // We implement by value as the object may not reside on the heap.
   355  func (t TxnMeta) FormatW(buf io.Writer) {
   356  	// Compute priority as a floating point number from 0-100 for readability.
   357  	floatPri := 100 * float64(t.Priority) / float64(math.MaxInt32)
   358  	fmt.Fprintf(buf,
   359  		"id=%s key=%s pri=%.8f epo=%d ts=%s min=%s seq=%d",
   360  		t.Short(),
   361  		FormatBytesAsKey(t.Key),
   362  		floatPri,
   363  		t.Epoch,
   364  		t.WriteTimestamp,
   365  		t.MinTimestamp,
   366  		t.Sequence)
   367  }
   368  
   369  // SafeMessage implements the SafeMessager interface.
   370  //
   371  // This method should be kept largely synchronized with String(), except that it
   372  // can't include sensitive info (e.g. the transaction key).
   373  //
   374  // We implement by value as the object may not reside on the heap.
   375  func (t TxnMeta) SafeMessage() string {
   376  	var buf strings.Builder
   377  	// Compute priority as a floating point number from 0-100 for readability.
   378  	floatPri := 100 * float64(t.Priority) / float64(math.MaxInt32)
   379  	fmt.Fprintf(&buf,
   380  		"id=%s pri=%.8f epo=%d ts=%s min=%s seq=%d",
   381  		t.Short(),
   382  		floatPri,
   383  		t.Epoch,
   384  		t.WriteTimestamp,
   385  		t.MinTimestamp,
   386  		t.Sequence)
   387  	return buf.String()
   388  }
   389  
   390  var _ errors.SafeMessager = (*TxnMeta)(nil)
   391  
   392  // FormatBytesAsKey is injected by module roachpb as dependency upon initialization.
   393  var FormatBytesAsKey = func(k []byte) string {
   394  	return string(k)
   395  }
   396  
   397  // FormatBytesAsValue is injected by module roachpb as dependency upon initialization.
   398  var FormatBytesAsValue = func(v []byte) string {
   399  	return string(v)
   400  }