github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/enginepb/mvcc.proto (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // Cannot be proto3 because we depend on absent-vs-empty distinction. 12 syntax = "proto2"; 13 package cockroach.storage.enginepb; 14 option go_package = "enginepb"; 15 16 import "storage/enginepb/mvcc3.proto"; 17 import "util/hlc/legacy_timestamp.proto"; 18 import "gogoproto/gogo.proto"; 19 20 // MVCCMetadata holds MVCC metadata for a key. Used by storage/mvcc.go. 21 // An MVCCMetadata is stored for a versioned key while there is an intent on 22 // that key. 23 message MVCCMetadata { 24 option (gogoproto.goproto_stringer) = false; 25 option (gogoproto.equal) = true; 26 option (gogoproto.populate) = true; 27 28 // The transaction metadata. Present for intents, but not for inline 29 // values (e.g. timeseries data). Also not present for 30 // "reconstructed" metadata that is used during MVCC processing when 31 // no intent exists on disk. 32 optional TxnMeta txn = 1; 33 // The timestamp of the most recent versioned value if this is a 34 // value that may have multiple versions. For values which may have 35 // only one version, the data is stored inline (via raw_bytes), and 36 // timestamp is set to zero. 37 optional util.hlc.LegacyTimestamp timestamp = 2 [(gogoproto.nullable) = false]; 38 // Is the most recent value a deletion tombstone? 39 optional bool deleted = 3 [(gogoproto.nullable) = false]; 40 // The size in bytes of the most recent encoded key. 41 optional int64 key_bytes = 4 [(gogoproto.nullable) = false]; 42 // The size in bytes of the most recent versioned value. 43 optional int64 val_bytes = 5 [(gogoproto.nullable) = false]; 44 // Inline value, used for non-versioned values with zero 45 // timestamp. This provides an efficient short circuit of the normal 46 // MVCC metadata sentinel and subsequent version rows. If timestamp 47 // == (0, 0), then there is only a single MVCC metadata row with 48 // value inlined, and with empty timestamp, key_bytes, and 49 // val_bytes. 50 optional bytes raw_bytes = 6; 51 // SequencedIntent stores a value at a given key and the sequence number it was 52 // written at - to be stored in an IntentHistory of a key during a transaction. 53 message SequencedIntent { 54 option (gogoproto.goproto_stringer) = false; 55 option (gogoproto.equal) = true; 56 option (gogoproto.populate) = true; 57 // Sequence is a one-indexed number which is increased on each request 58 // set as part of a transaction. It uniquely identifies a value from 59 // the IntentHistory. 60 optional int32 sequence = 1 [(gogoproto.nullable) = false, (gogoproto.casttype) = "TxnSeq"]; 61 // Value is the value written to the key as part of the transaction at 62 // the above Sequence. 63 optional bytes value = 2; 64 } 65 66 // IntentHistory of the transaction stores the older values the txn wrote 67 // for the key along with each values corresponding Sequence. It doesn't 68 // contain the latest intent value but rather stores all the values that have 69 // been overwritten by the transaction. 70 // IntentHistory will be empty for non-transactional requests. 71 repeated SequencedIntent intent_history = 8 [(gogoproto.nullable) = false]; 72 // This provides a measure of protection against replays caused by 73 // Raft duplicating merge commands. 74 optional util.hlc.LegacyTimestamp merge_timestamp = 7; 75 } 76 77 // A mirror of MVCCMetadata intended for serializing non-MVCC data that is 78 // merged within the RocksDB or Pebble engines. Such data only populates 79 // raw_bytes and optionally merge_timestamp. The C++ serialization of 80 // MVCCMetadata does not serialize any of the missing optional fields, but 81 // the Go serialization treats the optional fields annotated with 82 // [(gogoproto.nullable) = false] in a manner that cannot distinguish 83 // between the default and missing value, and causes them to serialized 84 // (e.g. fields with tag 2, 3, 4, 5). By using the following proto in the 85 // Go merge code, the Go and C++ serialization match. 86 message MVCCMetadataSubsetForMergeSerialization { 87 option (gogoproto.goproto_stringer) = false; 88 option (gogoproto.equal) = true; 89 option (gogoproto.populate) = true; 90 91 optional bytes raw_bytes = 6; 92 optional util.hlc.LegacyTimestamp merge_timestamp = 7; 93 } 94 95 // MVCCStats tracks byte and instance counts for various groups of keys, 96 // values, or key-value pairs; see the field comments for details. 97 // 98 // It also tracks two cumulative ages, namely that of intents and non-live 99 // (i.e. GC-able) bytes. This computation is intrinsically linked to 100 // last_update_nanos and is easy to get wrong. Updates happen only once every 101 // full second, as measured by last_update_nanos/1e9. That is, forward updates 102 // don't change last_update_nanos until an update at a timestamp which, 103 // truncated to the second, is ahead of last_update_nanos/1e9. Then, that 104 // difference in seconds times the base quantity (excluding the currently 105 // running update) is added to the age. 106 // 107 // To give an example, if an intent is around from `t=2.5s` to `t=4.1s` (the 108 // current time), then it contributes an intent age of two seconds (one second 109 // picked up when crossing `t=3s`, another one at `t=4s`). Similarly, if a 110 // GC'able kv pair is around for this amount of time, it contributes two seconds 111 // times its size in bytes. 112 // 113 // It gets more complicated when data is 114 // accounted for with a timestamp behind last_update_nanos. In this case, if 115 // more than a second has passed (computed via truncation above), the ages have 116 // to be adjusted to account for this late addition. This isn't hard: add the 117 // new data's base quantity times the (truncated) number of seconds behind. 118 // Important to keep in mind with those computations is that (x/1e9 - y/1e9) 119 // does not equal (x-y)/1e9 in most cases. 120 // 121 // Note that this struct must be kept at a fixed size by using fixed-size 122 // encodings for all fields and by making all fields non-nullable. This is 123 // so that it can predict its own impact on the size of the system-local 124 // kv-pairs. 125 message MVCCStats { 126 option (gogoproto.equal) = true; 127 option (gogoproto.populate) = true; 128 129 // contains_estimates indicates that the MVCCStats object contains values 130 // which have been estimated. This means that the stats should not be used 131 // where complete accuracy is required, and instead should be recomputed 132 // when necessary. See clusterversion.VersionContainsEstimatesCounter for 133 // details about the migration from bool to int64. 134 optional int64 contains_estimates = 14 [(gogoproto.nullable) = false]; 135 136 // last_update_nanos is a timestamp at which the ages were last 137 // updated. See the comment on MVCCStats. 138 optional sfixed64 last_update_nanos = 1 [(gogoproto.nullable) = false]; 139 // intent_age is the cumulative age of the tracked intents. 140 // See the comment on MVCCStats. 141 optional sfixed64 intent_age = 2 [(gogoproto.nullable) = false]; 142 // gc_bytes_age is the cumulative age of the non-live data (i.e. 143 // data included in key_bytes and val_bytes, but not live_bytes). 144 // See the comment on MVCCStats. 145 optional sfixed64 gc_bytes_age = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "GCBytesAge"]; 146 // live_bytes is the number of bytes stored in keys and values which can in 147 // principle be read by means of a Scan or Get in the far future, including 148 // intents but not deletion tombstones (or their intents). Note that the 149 // size of the meta kv pair (which could be explicit or implicit) is 150 // included in this. Only the meta kv pair counts for the actual length of 151 // the encoded key (regular pairs only count the timestamp suffix). 152 optional sfixed64 live_bytes = 4 [(gogoproto.nullable) = false]; 153 // live_count is the number of meta keys tracked under live_bytes. 154 optional sfixed64 live_count = 5 [(gogoproto.nullable) = false]; 155 // key_bytes is the number of bytes stored in all non-system 156 // keys, including live, meta, old, and deleted keys. 157 // Only meta keys really account for the "full" key; value 158 // keys only for the timestamp suffix. 159 optional sfixed64 key_bytes = 6 [(gogoproto.nullable) = false]; 160 // key_count is the number of meta keys tracked under key_bytes. 161 optional sfixed64 key_count = 7 [(gogoproto.nullable) = false]; 162 // value_bytes is the number of bytes in all non-system version 163 // values, including meta values. 164 optional sfixed64 val_bytes = 8 [(gogoproto.nullable) = false]; 165 // val_count is the number of meta values tracked under val_bytes. 166 optional sfixed64 val_count = 9 [(gogoproto.nullable) = false]; 167 // intent_bytes is the number of bytes in intent key-value 168 // pairs (without their meta keys). 169 optional sfixed64 intent_bytes = 10 [(gogoproto.nullable) = false]; 170 // intent_count is the number of keys tracked under intent_bytes. 171 // It is equal to the number of meta keys in the system with 172 // a non-empty Transaction proto. 173 optional sfixed64 intent_count = 11 [(gogoproto.nullable) = false]; 174 175 // sys_bytes is the number of bytes stored in system-local kv-pairs. 176 // This tracks the same quantity as (key_bytes + val_bytes), but 177 // for system-local metadata keys (which aren't counted in either 178 // key_bytes or val_bytes). Each of the keys falling into this group 179 // is documented in keys/constants.go under the localPrefix constant 180 // and is prefixed by either LocalRangeIDPrefix or LocalRangePrefix. 181 optional sfixed64 sys_bytes = 12 [(gogoproto.nullable) = false]; 182 // sys_count is the number of meta keys tracked under sys_bytes. 183 optional sfixed64 sys_count = 13 [(gogoproto.nullable) = false]; 184 }