github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/enginepb/mvcc.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package enginepb 12 13 import ( 14 "fmt" 15 "io" 16 "math" 17 "sort" 18 "strings" 19 20 "github.com/cockroachdb/errors" 21 ) 22 23 // TxnEpoch is a zero-indexed epoch for a transaction. When a transaction 24 // retries, it increments its epoch, invalidating all of its previous writes. 25 type TxnEpoch int32 26 27 // TxnSeq is a zero-indexed sequence number assigned to a request performed by a 28 // transaction. Writes within a transaction have unique sequences and start at 29 // sequence number 1. Reads within a transaction have non-unique sequences and 30 // start at sequence number 0. 31 // 32 // Writes within a transaction logically take place in sequence number order. 33 // Reads within a transaction observe only writes performed by the transaction 34 // at equal or lower sequence numbers. 35 type TxnSeq int32 36 37 // TxnPriority defines the priority that a transaction operates at. Transactions 38 // with high priorities are preferred over transaction with low priorities when 39 // resolving conflicts between themselves. For example, transaction priorities 40 // are used to determine which transaction to abort when resolving transaction 41 // deadlocks. 42 type TxnPriority int32 43 44 const ( 45 // MinTxnPriority is the minimum allowed txn priority. 46 MinTxnPriority TxnPriority = 0 47 // MaxTxnPriority is the maximum allowed txn priority. 48 MaxTxnPriority TxnPriority = math.MaxInt32 49 ) 50 51 // TxnSeqIsIgnored returns true iff the sequence number overlaps with 52 // any range in the ignored array. 53 func TxnSeqIsIgnored(seq TxnSeq, ignored []IgnoredSeqNumRange) bool { 54 // The ignored seqnum ranges are guaranteed to be 55 // non-overlapping, non-contiguous, and guaranteed to be 56 // sorted in seqnum order. We're going to look from the end to 57 // see if the current intent seqnum is ignored. 58 for i := len(ignored) - 1; i >= 0; i-- { 59 if seq < ignored[i].Start { 60 // The history entry's sequence number is lower/older than 61 // the current ignored range. Go to the previous range 62 // and try again. 63 continue 64 } 65 66 // Here we have a range where the start seqnum is lower than the current 67 // intent seqnum. Does it include it? 68 if seq > ignored[i].End { 69 // Here we have a range where the current history entry's seqnum 70 // is higher than the range's end seqnum. Given that the 71 // ranges are sorted, we're guaranteed that there won't 72 // be any further overlapping range at a lower value of i. 73 return false 74 } 75 // Yes, it's included. We're going to skip over this 76 // intent seqnum and retry the search above. 77 return true 78 } 79 80 // Exhausted the ignore list. Not ignored. 81 return false 82 } 83 84 // Short returns a prefix of the transaction's ID. 85 func (t TxnMeta) Short() string { 86 return t.ID.Short() 87 } 88 89 // Total returns the range size as the sum of the key and value 90 // bytes. This includes all non-live keys and all versioned values. 91 func (ms MVCCStats) Total() int64 { 92 return ms.KeyBytes + ms.ValBytes 93 } 94 95 // GCBytes is a convenience function which returns the number of gc bytes, 96 // that is the key and value bytes excluding the live bytes. 97 func (ms MVCCStats) GCBytes() int64 { 98 return ms.KeyBytes + ms.ValBytes - ms.LiveBytes 99 } 100 101 // AvgIntentAge returns the average age of outstanding intents, 102 // based on current wall time specified via nowNanos. 103 func (ms MVCCStats) AvgIntentAge(nowNanos int64) float64 { 104 if ms.IntentCount == 0 { 105 return 0 106 } 107 // Advance age by any elapsed time since last computed. Note that 108 // we operate on a copy. 109 ms.AgeTo(nowNanos) 110 return float64(ms.IntentAge) / float64(ms.IntentCount) 111 } 112 113 // GCByteAge returns the total age of outstanding gc'able 114 // bytes, based on current wall time specified via nowNanos. 115 // nowNanos is ignored if it's a past timestamp as seen by 116 // rs.LastUpdateNanos. 117 func (ms MVCCStats) GCByteAge(nowNanos int64) int64 { 118 ms.AgeTo(nowNanos) // we operate on a copy 119 return ms.GCBytesAge 120 } 121 122 // Forward is like AgeTo, but if nowNanos is not ahead of ms.LastUpdateNanos, 123 // this method is a noop. 124 func (ms *MVCCStats) Forward(nowNanos int64) { 125 if ms.LastUpdateNanos >= nowNanos { 126 return 127 } 128 ms.AgeTo(nowNanos) 129 } 130 131 // AgeTo encapsulates the complexity of computing the increment in age 132 // quantities contained in MVCCStats. Two MVCCStats structs only add and 133 // subtract meaningfully if their LastUpdateNanos matches, so aging them to 134 // the max of their LastUpdateNanos is a prerequisite, though Add() takes 135 // care of this internally. 136 func (ms *MVCCStats) AgeTo(nowNanos int64) { 137 // Seconds are counted every time each individual nanosecond timestamp 138 // crosses a whole second boundary (i.e. is zero mod 1E9). Thus it would 139 // be a mistake to use the (nonequivalent) expression (a-b)/1E9. 140 diffSeconds := nowNanos/1e9 - ms.LastUpdateNanos/1e9 141 142 ms.GCBytesAge += ms.GCBytes() * diffSeconds 143 ms.IntentAge += ms.IntentCount * diffSeconds 144 ms.LastUpdateNanos = nowNanos 145 } 146 147 // Add adds values from oms to ms. The ages will be moved forward to the 148 // larger of the LastUpdateNano timestamps involved. 149 func (ms *MVCCStats) Add(oms MVCCStats) { 150 // Enforce the max LastUpdateNanos for both ages based on their 151 // pre-addition state. 152 ms.Forward(oms.LastUpdateNanos) 153 oms.Forward(ms.LastUpdateNanos) // on local copy 154 155 ms.ContainsEstimates += oms.ContainsEstimates 156 157 // Now that we've done that, we may just add them. 158 ms.IntentAge += oms.IntentAge 159 ms.GCBytesAge += oms.GCBytesAge 160 ms.LiveBytes += oms.LiveBytes 161 ms.KeyBytes += oms.KeyBytes 162 ms.ValBytes += oms.ValBytes 163 ms.IntentBytes += oms.IntentBytes 164 ms.LiveCount += oms.LiveCount 165 ms.KeyCount += oms.KeyCount 166 ms.ValCount += oms.ValCount 167 ms.IntentCount += oms.IntentCount 168 ms.SysBytes += oms.SysBytes 169 ms.SysCount += oms.SysCount 170 } 171 172 // Subtract removes oms from ms. The ages will be moved forward to the larger of 173 // the LastUpdateNano timestamps involved. 174 func (ms *MVCCStats) Subtract(oms MVCCStats) { 175 // Enforce the max LastUpdateNanos for both ages based on their 176 // pre-subtraction state. 177 ms.Forward(oms.LastUpdateNanos) 178 oms.Forward(ms.LastUpdateNanos) 179 180 ms.ContainsEstimates -= oms.ContainsEstimates 181 182 // Now that we've done that, we may subtract. 183 ms.IntentAge -= oms.IntentAge 184 ms.GCBytesAge -= oms.GCBytesAge 185 ms.LiveBytes -= oms.LiveBytes 186 ms.KeyBytes -= oms.KeyBytes 187 ms.ValBytes -= oms.ValBytes 188 ms.IntentBytes -= oms.IntentBytes 189 ms.LiveCount -= oms.LiveCount 190 ms.KeyCount -= oms.KeyCount 191 ms.ValCount -= oms.ValCount 192 ms.IntentCount -= oms.IntentCount 193 ms.SysBytes -= oms.SysBytes 194 ms.SysCount -= oms.SysCount 195 } 196 197 // IsInline returns true if the value is inlined in the metadata. 198 func (meta MVCCMetadata) IsInline() bool { 199 return meta.RawBytes != nil 200 } 201 202 // AddToIntentHistory adds the sequence and value to the intent history. 203 func (meta *MVCCMetadata) AddToIntentHistory(seq TxnSeq, val []byte) { 204 meta.IntentHistory = append(meta.IntentHistory, 205 MVCCMetadata_SequencedIntent{Sequence: seq, Value: val}) 206 } 207 208 // GetPrevIntentSeq goes through the intent history and finds the previous 209 // intent's sequence number given the current sequence. 210 func (meta *MVCCMetadata) GetPrevIntentSeq( 211 seq TxnSeq, ignored []IgnoredSeqNumRange, 212 ) (MVCCMetadata_SequencedIntent, bool) { 213 end := len(meta.IntentHistory) 214 found := 0 215 for { 216 index := sort.Search(end, func(i int) bool { 217 return meta.IntentHistory[i].Sequence >= seq 218 }) 219 if index == 0 { 220 // It is possible that no intent exists such that the sequence is less 221 // than the read sequence. In this case, we cannot read a value from the 222 // intent history. 223 return MVCCMetadata_SequencedIntent{}, false 224 } 225 candidate := index - 1 226 if TxnSeqIsIgnored(meta.IntentHistory[candidate].Sequence, ignored) { 227 // This entry was part of an ignored range. Skip it and 228 // try the search again, using the current position as new 229 // upper bound. 230 end = candidate 231 continue 232 } 233 // This history entry has not been ignored, so we're going to keep it. 234 found = candidate 235 break 236 } 237 return meta.IntentHistory[found], true 238 } 239 240 // GetIntentValue goes through the intent history and finds the value 241 // written at the sequence number. 242 func (meta *MVCCMetadata) GetIntentValue(seq TxnSeq) ([]byte, bool) { 243 index := sort.Search(len(meta.IntentHistory), func(i int) bool { 244 return meta.IntentHistory[i].Sequence >= seq 245 }) 246 if index < len(meta.IntentHistory) && meta.IntentHistory[index].Sequence == seq { 247 return meta.IntentHistory[index].Value, true 248 } 249 return nil, false 250 } 251 252 // String implements the fmt.Stringer interface. 253 func (m *MVCCMetadata_SequencedIntent) String() string { 254 var buf strings.Builder 255 m.FormatW(&buf, false /* expand */) 256 return buf.String() 257 } 258 259 // Format implements the fmt.Formatter interface. 260 func (m *MVCCMetadata_SequencedIntent) Format(f fmt.State, r rune) { 261 m.FormatW(f, f.Flag('+')) 262 } 263 264 // FormatW enables grouping formatters around a single buffer while 265 // avoiding copies. 266 func (m *MVCCMetadata_SequencedIntent) FormatW(buf io.Writer, expand bool) { 267 fmt.Fprintf(buf, 268 "{%d %s}", 269 m.Sequence, 270 FormatBytesAsValue(m.Value)) 271 } 272 273 // String implements the fmt.Stringer interface. 274 func (meta *MVCCMetadata) String() string { 275 var buf strings.Builder 276 meta.FormatW(&buf, false /* expand */) 277 return buf.String() 278 } 279 280 // Format implements the fmt.Formatter interface. 281 func (meta *MVCCMetadata) Format(f fmt.State, r rune) { 282 meta.FormatW(f, f.Flag('+')) 283 } 284 285 // FormatW enables grouping formatters around a single buffer while 286 // avoiding copies. 287 func (meta *MVCCMetadata) FormatW(buf io.Writer, expand bool) { 288 fmt.Fprintf(buf, "txn={%s} ts=%s del=%t klen=%d vlen=%d", 289 meta.Txn, 290 meta.Timestamp, 291 meta.Deleted, 292 meta.KeyBytes, 293 meta.ValBytes, 294 ) 295 if len(meta.RawBytes) > 0 { 296 if expand { 297 fmt.Fprintf(buf, " raw=%s", FormatBytesAsValue(meta.RawBytes)) 298 } else { 299 fmt.Fprintf(buf, " rawlen=%d", len(meta.RawBytes)) 300 } 301 } 302 if nih := len(meta.IntentHistory); nih > 0 { 303 if expand { 304 fmt.Fprint(buf, " ih={") 305 for i := range meta.IntentHistory { 306 meta.IntentHistory[i].FormatW(buf, expand) 307 } 308 fmt.Fprint(buf, "}") 309 } else { 310 fmt.Fprintf(buf, " nih=%d", nih) 311 } 312 } 313 } 314 315 func (meta *MVCCMetadataSubsetForMergeSerialization) String() string { 316 var m MVCCMetadata 317 m.RawBytes = meta.RawBytes 318 m.MergeTimestamp = meta.MergeTimestamp 319 return m.String() 320 } 321 322 // SafeMessage implements the SafeMessager interface. 323 // 324 // This method should be kept largely synchronized with String(), except that it 325 // can't include sensitive info (e.g. the transaction key). 326 func (meta *MVCCMetadata) SafeMessage() string { 327 var buf strings.Builder 328 fmt.Fprintf(&buf, "{%s} ts=%s del=%t klen=%d vlen=%d", 329 meta.Txn.SafeMessage(), 330 meta.Timestamp, 331 meta.Deleted, 332 meta.KeyBytes, 333 meta.ValBytes, 334 ) 335 if len(meta.RawBytes) > 0 { 336 fmt.Fprintf(&buf, " rawlen=%d", len(meta.RawBytes)) 337 } 338 if nih := len(meta.IntentHistory); nih > 0 { 339 fmt.Fprintf(&buf, " nih=%d", nih) 340 } 341 return buf.String() 342 } 343 344 // String implements the fmt.Stringer interface. 345 // We implement by value as the object may not reside on the heap. 346 func (t TxnMeta) String() string { 347 var buf strings.Builder 348 t.FormatW(&buf) 349 return buf.String() 350 } 351 352 // FormatW enables grouping formatters around a single buffer while 353 // avoiding copies. 354 // We implement by value as the object may not reside on the heap. 355 func (t TxnMeta) FormatW(buf io.Writer) { 356 // Compute priority as a floating point number from 0-100 for readability. 357 floatPri := 100 * float64(t.Priority) / float64(math.MaxInt32) 358 fmt.Fprintf(buf, 359 "id=%s key=%s pri=%.8f epo=%d ts=%s min=%s seq=%d", 360 t.Short(), 361 FormatBytesAsKey(t.Key), 362 floatPri, 363 t.Epoch, 364 t.WriteTimestamp, 365 t.MinTimestamp, 366 t.Sequence) 367 } 368 369 // SafeMessage implements the SafeMessager interface. 370 // 371 // This method should be kept largely synchronized with String(), except that it 372 // can't include sensitive info (e.g. the transaction key). 373 // 374 // We implement by value as the object may not reside on the heap. 375 func (t TxnMeta) SafeMessage() string { 376 var buf strings.Builder 377 // Compute priority as a floating point number from 0-100 for readability. 378 floatPri := 100 * float64(t.Priority) / float64(math.MaxInt32) 379 fmt.Fprintf(&buf, 380 "id=%s pri=%.8f epo=%d ts=%s min=%s seq=%d", 381 t.Short(), 382 floatPri, 383 t.Epoch, 384 t.WriteTimestamp, 385 t.MinTimestamp, 386 t.Sequence) 387 return buf.String() 388 } 389 390 var _ errors.SafeMessager = (*TxnMeta)(nil) 391 392 // FormatBytesAsKey is injected by module roachpb as dependency upon initialization. 393 var FormatBytesAsKey = func(k []byte) string { 394 return string(k) 395 } 396 397 // FormatBytesAsValue is injected by module roachpb as dependency upon initialization. 398 var FormatBytesAsValue = func(v []byte) string { 399 return string(v) 400 }