github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/enginepb/mvcc.proto (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Cannot be proto3 because we depend on absent-vs-empty distinction.
    12  syntax = "proto2";
    13  package cockroach.storage.enginepb;
    14  option go_package = "enginepb";
    15  
    16  import "storage/enginepb/mvcc3.proto";
    17  import "util/hlc/legacy_timestamp.proto";
    18  import "gogoproto/gogo.proto";
    19  
    20  // MVCCMetadata holds MVCC metadata for a key. Used by storage/mvcc.go.
    21  // An MVCCMetadata is stored for a versioned key while there is an intent on
    22  // that key.
    23  message MVCCMetadata {
    24    option (gogoproto.goproto_stringer) = false;
    25    option (gogoproto.equal) = true;
    26    option (gogoproto.populate) = true;
    27  
    28    // The transaction metadata. Present for intents, but not for inline
    29    // values (e.g. timeseries data). Also not present for
    30    // "reconstructed" metadata that is used during MVCC processing when
    31    // no intent exists on disk.
    32    optional TxnMeta txn = 1;
    33    // The timestamp of the most recent versioned value if this is a
    34    // value that may have multiple versions. For values which may have
    35    // only one version, the data is stored inline (via raw_bytes), and
    36    // timestamp is set to zero.
    37    optional util.hlc.LegacyTimestamp timestamp = 2 [(gogoproto.nullable) = false];
    38    // Is the most recent value a deletion tombstone?
    39    optional bool deleted = 3 [(gogoproto.nullable) = false];
    40    // The size in bytes of the most recent encoded key.
    41    optional int64 key_bytes = 4 [(gogoproto.nullable) = false];
    42    // The size in bytes of the most recent versioned value.
    43    optional int64 val_bytes = 5 [(gogoproto.nullable) = false];
    44    // Inline value, used for non-versioned values with zero
    45    // timestamp. This provides an efficient short circuit of the normal
    46    // MVCC metadata sentinel and subsequent version rows. If timestamp
    47    // == (0, 0), then there is only a single MVCC metadata row with
    48    // value inlined, and with empty timestamp, key_bytes, and
    49    // val_bytes.
    50    optional bytes raw_bytes = 6;
    51    // SequencedIntent stores a value at a given key and the sequence number it was
    52    // written at - to be stored in an IntentHistory of a key during a transaction.
    53    message SequencedIntent {
    54      option (gogoproto.goproto_stringer) = false;
    55      option (gogoproto.equal) = true;
    56      option (gogoproto.populate) = true;
    57      // Sequence is a one-indexed number which is increased on each request
    58      // set as part of a transaction. It uniquely identifies a value from
    59      // the IntentHistory.
    60      optional int32 sequence = 1 [(gogoproto.nullable) = false, (gogoproto.casttype) = "TxnSeq"];
    61      // Value is the value written to the key as part of the transaction at
    62      // the above Sequence.
    63      optional bytes value = 2;
    64    }
    65  
    66    // IntentHistory of the transaction stores the older values the txn wrote
    67    // for the key along with each values corresponding Sequence. It doesn't
    68    // contain the latest intent value but rather stores all the values that have
    69    // been overwritten by the transaction.
    70    // IntentHistory will be empty for non-transactional requests.
    71    repeated SequencedIntent intent_history = 8 [(gogoproto.nullable) = false];
    72    // This provides a measure of protection against replays caused by
    73    // Raft duplicating merge commands.
    74    optional util.hlc.LegacyTimestamp merge_timestamp = 7;
    75  }
    76  
    77  // A mirror of MVCCMetadata intended for serializing non-MVCC data that is
    78  // merged within the RocksDB or Pebble engines. Such data only populates
    79  // raw_bytes and optionally merge_timestamp. The C++ serialization of
    80  // MVCCMetadata does not serialize any of the missing optional fields, but
    81  // the Go serialization treats the optional fields annotated with
    82  // [(gogoproto.nullable) = false] in a manner that cannot distinguish
    83  // between the default and missing value, and causes them to serialized
    84  // (e.g. fields with tag 2, 3, 4, 5). By using the following proto in the
    85  // Go merge code, the Go and C++ serialization match.
    86  message MVCCMetadataSubsetForMergeSerialization {
    87    option (gogoproto.goproto_stringer) = false;
    88    option (gogoproto.equal) = true;
    89    option (gogoproto.populate) = true;
    90  
    91    optional bytes raw_bytes = 6;
    92    optional util.hlc.LegacyTimestamp merge_timestamp = 7;
    93  }
    94  
    95  // MVCCStats tracks byte and instance counts for various groups of keys,
    96  // values, or key-value pairs; see the field comments for details.
    97  //
    98  // It also tracks two cumulative ages, namely that of intents and non-live
    99  // (i.e. GC-able) bytes. This computation is intrinsically linked to
   100  // last_update_nanos and is easy to get wrong. Updates happen only once every
   101  // full second, as measured by last_update_nanos/1e9. That is, forward updates
   102  // don't change last_update_nanos until an update at a timestamp which,
   103  // truncated to the second, is ahead of last_update_nanos/1e9. Then, that
   104  // difference in seconds times the base quantity (excluding the currently
   105  // running update) is added to the age.
   106  //
   107  // To give an example, if an intent is around from `t=2.5s` to `t=4.1s` (the
   108  // current time), then it contributes an intent age of two seconds (one second
   109  // picked up when crossing `t=3s`, another one at `t=4s`). Similarly, if a
   110  // GC'able kv pair is around for this amount of time, it contributes two seconds
   111  // times its size in bytes.
   112  //
   113  // It gets more complicated when data is
   114  // accounted for with a timestamp behind last_update_nanos. In this case, if
   115  // more than a second has passed (computed via truncation above), the ages have
   116  // to be adjusted to account for this late addition. This isn't hard: add the
   117  // new data's base quantity times the (truncated) number of seconds behind.
   118  // Important to keep in mind with those computations is that (x/1e9 - y/1e9)
   119  // does not equal (x-y)/1e9 in most cases.
   120  //
   121  // Note that this struct must be kept at a fixed size by using fixed-size
   122  // encodings for all fields and by making all fields non-nullable. This is
   123  // so that it can predict its own impact on the size of the system-local
   124  // kv-pairs.
   125  message MVCCStats {
   126    option (gogoproto.equal) = true;
   127    option (gogoproto.populate) = true;
   128  
   129    // contains_estimates indicates that the MVCCStats object contains values
   130    // which have been estimated. This means that the stats should not be used
   131    // where complete accuracy is required, and instead should be recomputed
   132    // when necessary. See clusterversion.VersionContainsEstimatesCounter for
   133    // details about the migration from bool to int64.
   134    optional int64 contains_estimates = 14 [(gogoproto.nullable) = false];
   135  
   136    // last_update_nanos is a timestamp at which the ages were last
   137    // updated. See the comment on MVCCStats.
   138    optional sfixed64 last_update_nanos = 1 [(gogoproto.nullable) = false];
   139    // intent_age is the cumulative age of the tracked intents.
   140    // See the comment on MVCCStats.
   141    optional sfixed64 intent_age = 2 [(gogoproto.nullable) = false];
   142    // gc_bytes_age is the cumulative age of the non-live data (i.e.
   143    // data included in key_bytes and val_bytes, but not live_bytes).
   144    // See the comment on MVCCStats.
   145    optional sfixed64 gc_bytes_age = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "GCBytesAge"];
   146    // live_bytes is the number of bytes stored in keys and values which can in
   147    // principle be read by means of a Scan or Get in the far future, including
   148    // intents but not deletion tombstones (or their intents). Note that the
   149    // size of the meta kv pair (which could be explicit or implicit) is
   150    // included in this. Only the meta kv pair counts for the actual length of
   151    // the encoded key (regular pairs only count the timestamp suffix).
   152    optional sfixed64 live_bytes = 4 [(gogoproto.nullable) = false];
   153    // live_count is the number of meta keys tracked under live_bytes.
   154    optional sfixed64 live_count = 5 [(gogoproto.nullable) = false];
   155    // key_bytes is the number of bytes stored in all non-system
   156    // keys, including live, meta, old, and deleted keys.
   157    // Only meta keys really account for the "full" key; value
   158    // keys only for the timestamp suffix.
   159    optional sfixed64 key_bytes = 6 [(gogoproto.nullable) = false];
   160    // key_count is the number of meta keys tracked under key_bytes.
   161    optional sfixed64 key_count = 7 [(gogoproto.nullable) = false];
   162    // value_bytes is the number of bytes in all non-system version
   163    // values, including meta values.
   164    optional sfixed64 val_bytes = 8 [(gogoproto.nullable) = false];
   165    // val_count is the number of meta values tracked under val_bytes.
   166    optional sfixed64 val_count = 9 [(gogoproto.nullable) = false];
   167    // intent_bytes is the number of bytes in intent key-value
   168    // pairs (without their meta keys).
   169    optional sfixed64 intent_bytes = 10 [(gogoproto.nullable) = false];
   170    // intent_count is the number of keys tracked under intent_bytes.
   171    // It is equal to the number of meta keys in the system with
   172    // a non-empty Transaction proto.
   173    optional sfixed64 intent_count = 11 [(gogoproto.nullable) = false];
   174  
   175    // sys_bytes is the number of bytes stored in system-local kv-pairs.
   176    // This tracks the same quantity as (key_bytes + val_bytes), but
   177    // for system-local metadata keys (which aren't counted in either
   178    // key_bytes or val_bytes). Each of the keys falling into this group
   179    // is documented in keys/constants.go under the localPrefix constant
   180    // and is prefixed by either LocalRangeIDPrefix or LocalRangePrefix.
   181    optional sfixed64 sys_bytes = 12 [(gogoproto.nullable) = false];
   182    // sys_count is the number of meta keys tracked under sys_bytes.
   183    optional sfixed64 sys_count = 13 [(gogoproto.nullable) = false];
   184  }