github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/roachpb/api.proto (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 syntax = "proto3"; 12 package cockroach.roachpb; 13 option go_package = "roachpb"; 14 15 import "kv/kvserver/concurrency/lock/locking.proto"; 16 import "roachpb/data.proto"; 17 import "roachpb/errors.proto"; 18 import "roachpb/metadata.proto"; 19 import "storage/enginepb/mvcc.proto"; 20 import "storage/enginepb/mvcc3.proto"; 21 import "util/hlc/timestamp.proto"; 22 import "util/tracing/recorded_span.proto"; 23 import "gogoproto/gogo.proto"; 24 25 // ReadConsistencyType specifies what type of consistency is observed 26 // during read operations. 27 enum ReadConsistencyType { 28 option (gogoproto.goproto_enum_prefix) = false; 29 30 // CONSISTENT reads are guaranteed to read committed data; the 31 // mechanism relies on clocks to determine lease expirations. 32 CONSISTENT = 0; 33 // READ_UNCOMMITTED reads return both committed and uncommitted data. 34 // The consistency type is similar to INCONSISTENT in that using it 35 // can result in dirty reads. However, like the CONSISTENT type, it 36 // requires the replica performing the read to hold a valid read lease, 37 // meaning that it can't return arbitrarily stale data. 38 READ_UNCOMMITTED = 1; 39 // INCONSISTENT reads return the latest available, committed values. 40 // They are more efficient, but may read stale values as pending 41 // intents are ignored. 42 INCONSISTENT = 2; 43 } 44 45 // RangeInfo describes a range which executed a request. It contains 46 // the range descriptor and lease information at the time of execution. 47 message RangeInfo { 48 RangeDescriptor desc = 1 [(gogoproto.nullable) = false]; 49 Lease lease = 2 [(gogoproto.nullable) = false]; 50 } 51 52 // RequestHeader is supplied with every storage node request. 53 message RequestHeader { 54 option (gogoproto.equal) = true; 55 56 reserved 1, 2; 57 // The key for request. If the request operates on a range, this 58 // represents the starting key for the range. 59 bytes key = 3 [(gogoproto.casttype) = "Key"]; 60 // The end key is empty if the request spans only a single key. Otherwise, 61 // it must order strictly after Key. In such a case, the header indicates 62 // that the operation takes place on the key range from Key to EndKey, 63 // including Key and excluding EndKey. 64 bytes end_key = 4 [(gogoproto.casttype) = "Key"]; 65 // A zero-indexed transactional sequence number. 66 int32 sequence = 5 [ 67 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/storage/enginepb.TxnSeq"]; 68 } 69 70 // ResponseHeader is returned with every storage node response. 71 message ResponseHeader { 72 enum ResumeReason { 73 option (gogoproto.goproto_enum_prefix) = false; 74 // Zero value; no resume. 75 RESUME_UNKNOWN = 0; 76 // The spanning operation didn't finish because the key limit was 77 // exceeded. 78 RESUME_KEY_LIMIT = 1; 79 } 80 81 // txn is non-nil if the request specified a non-nil transaction. 82 // The transaction timestamp and/or priority may have been updated, 83 // depending on the outcome of the request. 84 // 85 // Once txn is merged into the BatchResponse_Header.Txn, it will be 86 // reset to nil to avoid sending superfluous information over the 87 // network. 88 Transaction txn = 3; 89 // The next span to resume from when the response doesn't cover the full span 90 // requested. This can happen when a bound on the keys is set through 91 // max_span_request_keys in the batch header or when a scan has been stopped 92 // before covering the requested data because of scan_options. 93 // 94 // ResumeSpan is unset when the entire span of keys have been 95 // operated on. The span is set to the original span if the request 96 // was ignored because max_span_request_keys was hit due to another 97 // request in the batch. For a reverse scan the end_key is updated. 98 Span resume_span = 4; 99 // When resume_span is populated, this specifies the reason why the operation 100 // wasn't completed and needs to be resumed. 101 // This field appeared in v2.0. Responses from storage coming from older 102 // servers will not contain it, but the conversion from a BatchResponse to a 103 // client.Result always fills it in. 104 ResumeReason resume_reason = 7; 105 106 // The number of keys operated on. 107 int64 num_keys = 5; 108 // The number of bytes returned. Only populated for requests that support it 109 // (at the time of writing, Scan and ReverseScan). The number returned here 110 // corresponds to the (Header).TargetBytes field and loosely measures the 111 // bytes in the timestamps, keys, and values of the returned rows. 112 int64 num_bytes = 8; 113 // Range or list of ranges used to execute the request. Multiple 114 // ranges may be returned for Scan, ReverseScan or DeleteRange. 115 repeated RangeInfo range_infos = 6 [(gogoproto.nullable) = false]; 116 } 117 118 // A GetRequest is the argument for the Get() method. 119 message GetRequest { 120 option (gogoproto.equal) = true; 121 122 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 123 } 124 125 // A GetResponse is the return value from the Get() method. 126 // If the key doesn't exist, Value will be nil. 127 message GetResponse { 128 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 129 Value value = 2; 130 131 // The intent seen, if any, when using the READ_UNCOMMITTED consistency level. 132 // 133 // NOTE: this field is not currently populated with intents for deletion 134 // tombstones. It probably should be because the value field may contain a 135 // value that is being deleted by a corresponding intent. We should revisit 136 // this decision if this ever becomes a problem. 137 Value intent_value = 3; 138 } 139 140 // A PutRequest is the argument to the Put() method. 141 message PutRequest { 142 option (gogoproto.equal) = true; 143 144 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 145 Value value = 2 [(gogoproto.nullable) = false]; 146 // Specify as true to put the value without a corresponding 147 // timestamp. This option should be used with care as it precludes 148 // the use of this value with transactions. 149 bool inline = 3; 150 // NOTE: For internal use only! Set to indicate that the put is 151 // writing to virgin keyspace and no reads are necessary to 152 // rationalize MVCC. 153 bool blind = 4; 154 } 155 156 // A PutResponse is the return value from the Put() method. 157 message PutResponse { 158 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 159 } 160 161 // A ConditionalPutRequest is the argument to the ConditionalPut() method. 162 // 163 // - Returns true and sets value if exp_value equals existing value. 164 // - If key doesn't exist and exp_value is nil, sets value. 165 // - If key exists, but value is empty and exp_value is not nil but empty, sets value. 166 // - Otherwise, returns a ConditionFailedError containing the actual value of the key. 167 // 168 // Note that the client is free to send more requests after a 169 // ConditionFailedError. This is not generally allowed after other errors 170 // because of fears over the ambiguity of the side-effects of failed requests 171 // (in particular, the timestamps at which intents might have been written). 172 // ConditionFailedError is a special case as we ensure there's no ambiguity; the 173 // error carries a WriteTimestamp that's the upper bound of the timestamps 174 // intents were written at. 175 message ConditionalPutRequest { 176 option (gogoproto.equal) = true; 177 178 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 179 // The value to put. 180 Value value = 2 [(gogoproto.nullable) = false]; 181 // Set exp_value.bytes empty to test for non-existence. Specify as nil 182 // to indicate there should be no existing entry. This is different 183 // from the expectation that the value exists but is empty. 184 Value exp_value = 3; 185 // NOTE: For internal use only! Set to indicate that the put is 186 // writing to virgin keyspace and no reads are necessary to 187 // rationalize MVCC. 188 bool blind = 4; 189 // Typically if a specific, non-empty expected value is supplied, it *must* 190 // exist with that value. Passing this indicates that it is also OK if the key 191 // does not exist. This is useful when a given value is expected but it is 192 // possible it has not yet been written. 193 bool allow_if_does_not_exist = 5; 194 } 195 196 // A ConditionalPutResponse is the return value from the 197 // ConditionalPut() method. 198 message ConditionalPutResponse { 199 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 200 } 201 202 // An InitPutRequest is the argument to the InitPut() method. 203 // 204 // - If key doesn't exist, sets value. 205 // - If key exists, returns a ConditionFailedError if value != existing value 206 // If failOnTombstones is set to true, tombstone values count as mismatched 207 // values and will cause a ConditionFailedError. 208 message InitPutRequest { 209 option (gogoproto.equal) = true; 210 211 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 212 Value value = 2 [(gogoproto.nullable) = false]; 213 // NOTE: For internal use only! Set to indicate that the put is 214 // writing to virgin keyspace and no reads are necessary to 215 // rationalize MVCC. 216 bool blind = 3; 217 // If true, tombstones cause ConditionFailedErrors. 218 bool failOnTombstones = 4; 219 } 220 221 // A InitPutResponse is the return value from the InitPut() method. 222 message InitPutResponse { 223 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 224 } 225 226 // An IncrementRequest is the argument to the Increment() method. It 227 // increments the value for key, and returns the new value. If no 228 // value exists for a key, incrementing by 0 is not a noop, but will 229 // create a zero value. IncrementRequest cannot be called on a key set 230 // by Put() or ConditionalPut(). Similarly, Put() and ConditionalPut() 231 // cannot be invoked on an incremented key. 232 message IncrementRequest { 233 option (gogoproto.equal) = true; 234 235 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 236 int64 increment = 2; 237 } 238 239 // An IncrementResponse is the return value from the Increment 240 // method. The new value after increment is specified in NewValue. If 241 // the value could not be decoded as specified, Error will be set. 242 message IncrementResponse { 243 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 244 int64 new_value = 2; 245 } 246 247 // A DeleteRequest is the argument to the Delete() method. 248 message DeleteRequest { 249 option (gogoproto.equal) = true; 250 251 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 252 } 253 254 // A DeleteResponse is the return value from the Delete() method. 255 message DeleteResponse { 256 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 257 } 258 259 // A DeleteRangeRequest is the argument to the DeleteRange() method. It 260 // specifies the range of keys to delete. 261 // 262 // A DeleteRangeRequest populates the timestamp cache and is tracked for 263 // refreshes. 264 message DeleteRangeRequest { 265 option (gogoproto.equal) = true; 266 267 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 268 reserved 2; 269 // return the keys that are deleted in the response. 270 bool return_keys = 3; 271 // delete "inline" keys which are stored without MVCC timestamps. Note that 272 // an "inline" DeleteRange will fail if it attempts to delete any keys which 273 // contain timestamped (non-inline) values; this option should only be used on 274 // keys which are known to store inline values, such as data in cockroach's 275 // time series system. 276 // 277 // Similarly, attempts to delete keys with inline values will fail unless this 278 // flag is set to true; the setting must match the data being deleted. 279 // 280 // Inline values cannot be deleted transactionally; a DeleteRange with 281 // "inline" set to true will fail if it is executed within a transaction. 282 bool inline = 4; 283 } 284 285 // A DeleteRangeResponse is the return value from the DeleteRange() 286 // method. 287 message DeleteRangeResponse { 288 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 289 // All the deleted keys if return_keys is set. 290 repeated bytes keys = 2 [(gogoproto.casttype) = "Key"]; 291 } 292 293 // A ClearRangeRequest is the argument to the ClearRange() method. It 294 // specifies a range of keys to clear from the underlying engine. Note 295 // that this differs from the behavior of DeleteRange, which sets 296 // transactional intents and writes tombstones to the deleted 297 // keys. ClearRange is used when permanently dropping or truncating 298 // table data. 299 // 300 // ClearRange also updates the GC threshold for the range to the 301 // timestamp at which this command executes, to prevent reads at 302 // earlier timestamps from incorrectly returning empty results. 303 // 304 // NOTE: it is important that this method only be invoked on a key 305 // range which is guaranteed to be both inactive and not see future 306 // writes. Ignoring this warning may result in data loss. 307 message ClearRangeRequest { 308 option (gogoproto.equal) = true; 309 310 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 311 } 312 313 // A ClearRangeResponse is the return value from the ClearRange() method. 314 message ClearRangeResponse { 315 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 316 } 317 318 319 // A RevertRangeRequest specifies a range of keys in which to clear all MVCC 320 // revisions more recent than some TargetTime from the underlying engine, thus 321 // reverting the range (from the perspective of an MVCC scan) to that time. 322 message RevertRangeRequest { 323 option (gogoproto.equal) = true; 324 325 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 326 327 // TargetTime specifies a the time to which to "revert" the range by clearing 328 // any MVCC key with a strictly higher timestamp. TargetTime must be higher 329 // than the GC Threshold for the replica - so that it is assured that the keys 330 // for that time are still there — or the request will fail. 331 util.hlc.Timestamp target_time = 2 [(gogoproto.nullable) = false]; 332 } 333 334 // A RevertRangeResponse is the return value from the RevertRange() method. 335 message RevertRangeResponse { 336 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 337 } 338 339 // ScanFormat is an enumeration of the available response formats for MVCCScan 340 // operations. 341 enum ScanFormat { 342 option (gogoproto.goproto_enum_prefix) = false; 343 344 // The standard MVCCScan format: a slice of KeyValue messages. 345 KEY_VALUES = 0; 346 // The batch_response format: a byte slice of alternating keys and values, 347 // each prefixed by their length as a varint. 348 BATCH_RESPONSE = 1; 349 } 350 351 352 // A ScanRequest is the argument to the Scan() method. It specifies the 353 // start and end keys for an ascending scan of [start,end) and the maximum 354 // number of results (unbounded if zero). 355 message ScanRequest { 356 option (gogoproto.equal) = true; 357 358 reserved 2, 3; 359 360 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 361 362 // The desired format for the response. If set to BATCH_RESPONSE, the server 363 // will set the batch_responses field in the ScanResponse instead of the rows 364 // field. 365 ScanFormat scan_format = 4; 366 367 // The desired key-level locking mode used during this scan. When set to None 368 // (the default), no key-level locking mode is used - meaning that the scan 369 // does not acquire any locks. When set to any other strength, a lock of that 370 // strength is acquired with the Unreplicated durability (i.e. best-effort) on 371 // each of the keys scanned by the request, subject to any key limit applied 372 // to the batch which limits the number of keys returned. 373 // 374 // NOTE: the locks acquire with this strength are point locks on each of the 375 // keys returned by the request, not a single range lock over the entire span 376 // scanned by the request. 377 kv.kvserver.concurrency.lock.Strength key_locking = 5; 378 } 379 380 // A ScanResponse is the return value from the Scan() method. 381 message ScanResponse { 382 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 383 // Empty if no rows were scanned. 384 repeated KeyValue rows = 2 [(gogoproto.nullable) = false]; 385 // The intent rows seen when performing a scan at the READ_UNCOMMITTED 386 // consistency level. These rows do not count against the MaxSpanRequestKeys 387 // count. 388 // 389 // NOTE: this field is not currently populated with intents for deletion 390 // tombstones. It probably should be because the rows field may contain 391 // key-values that are being deleted by corresponding intents. We should 392 // revisit this decision if this ever becomes a problem. 393 repeated KeyValue intent_rows = 3 [(gogoproto.nullable) = false]; 394 395 // If set, each item in this repeated bytes field contains part of the results 396 // in batch format - the key/value pairs are a buffer of varint-prefixed 397 // slices, alternating from key to value. Each entry in this field is 398 // complete - there are no key/value pairs that are split across more than one 399 // entry. There are num_keys total pairs across all entries, as defined by the 400 // ResponseHeader. If set, rows will not be set and vice versa. 401 repeated bytes batch_responses = 4; 402 } 403 404 // A ReverseScanRequest is the argument to the ReverseScan() method. It specifies the 405 // start and end keys for a descending scan of [start,end) and the maximum 406 // number of results (unbounded if zero). 407 message ReverseScanRequest { 408 option (gogoproto.equal) = true; 409 410 reserved 2, 3; 411 412 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 413 414 // The desired format for the response. If set to BATCH_RESPONSE, the server 415 // will set the batch_responses field in the ScanResponse instead of the rows 416 // field. 417 ScanFormat scan_format = 4; 418 419 // The desired key-level locking mode used during this scan. When set to None 420 // (the default), no key-level locking mode is used - meaning that the scan 421 // does not acquire any locks. When set to any other strength, a lock of that 422 // strength is acquired with the Unreplicated durability (i.e. best-effort) on 423 // each of the keys scanned by the request, subject to any key limit applied 424 // to the batch which limits the number of keys returned. 425 // 426 // NOTE: the locks acquire with this strength are point locks on each of the 427 // keys returned by the request, not a single range lock over the entire span 428 // scanned by the request. 429 kv.kvserver.concurrency.lock.Strength key_locking = 5; 430 } 431 432 // A ReverseScanResponse is the return value from the ReverseScan() method. 433 message ReverseScanResponse { 434 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 435 // Empty if no rows were scanned. 436 repeated KeyValue rows = 2 [(gogoproto.nullable) = false]; 437 // The intent rows seen when performing a scan at the READ_UNCOMMITTED 438 // consistency level. These rows do not count against the MaxSpanRequestKeys 439 // count. 440 // 441 // NOTE: this field is not currently populated with intents for deletion 442 // tombstones. It probably should be because the rows field may contain 443 // key-values that are being deleted by corresponding intents. We should 444 // revisit this decision if this ever becomes a problem. 445 repeated KeyValue intent_rows = 3 [(gogoproto.nullable) = false]; 446 447 // If set, each item in this repeated bytes field contains part of the results 448 // in batch format - the key/value pairs are a buffer of varint-prefixed 449 // slices, alternating from key to value. Each entry in this field is 450 // complete - there are no key/value pairs that are split across more than one 451 // entry. There are num_keys total pairs across all entries, as defined by the 452 // ResponseHeader. If set, rows will not be set and vice versa. 453 repeated bytes batch_responses = 4; 454 } 455 456 457 enum ChecksumMode { 458 // CHECK_VIA_QUEUE is set for requests made from the consistency queue. In 459 // this mode, a full check is carried out, and depending on the result a 460 // recursive consistency check is triggered: 461 // 462 // 1. no inconsistency found: if recomputed stats don't match persisted stats, 463 // trigger a RecomputeStatsRequest. 464 // 2. inconsistency found: if a diff is available, print it and trigger fatal 465 // error. If no diff found, trigger recursive check with diff requested 466 // (which then triggers fatal error). 467 // 468 // TODO(tbg): these semantics are an artifact of how consistency checks were 469 // first implemented. The extra behavior here should move to the consistency 470 // check queue instead and this option dropped from the enum. 471 CHECK_VIA_QUEUE = 0; 472 // CHECK_FULL recomputes the hash of the replicate data in all replicas and 473 // uses this to determine whether there is an inconsistency. 474 CHECK_FULL = 1; 475 // CHECK_STATS only hashes the persisted lease applied state (which notably 476 // includes the persisted MVCCStats) only. This catches a large class of 477 // replica inconsistencies observed in the wild (where replicas apply a 478 // nonidentical log of commands, and as a result almost always have 479 // divergent stats), while doing work independent of the size of the data 480 // contained in the replicas. 481 CHECK_STATS = 2; 482 } 483 484 // A CheckConsistencyRequest is the argument to the CheckConsistency() method. 485 // It specifies the start and end keys for a span of ranges to which a 486 // consistency check should be applied. A consistency check on a range involves 487 // running a ComputeChecksum on the range followed by a storage.CollectChecksum. 488 message CheckConsistencyRequest { 489 option (gogoproto.equal) = true; 490 491 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 492 // log a diff of inconsistencies if such inconsistencies are found. This is only 493 // valid if mode == FROM_QUEUE 494 bool with_diff = 2; 495 ChecksumMode mode = 3; 496 // Whether to create a RocksDB checkpoint on each replica at the log position 497 // at which the SHA is computed. The checkpoint is essentially a cheap point- 498 // in-time backup of the database. It will be put into the engines' auxiliary 499 // directory and needs to be removed manually to avoid leaking disk space. 500 bool checkpoint = 4; 501 // A list of nodes that the consistency check wants to terminate. This is 502 // typically set when Checkpoint above is also set, as part of a second round 503 // after a first consistency check that did find a divergence. The second 504 // round is concerned with damage control and wants the nodes it suspects hold 505 // anomalous data to be shut down, so that this data isn't served to clients 506 // (or worse, spread to other replicas). 507 repeated ReplicaDescriptor terminate = 5 [(gogoproto.nullable) = false]; 508 } 509 510 // A CheckConsistencyResponse is the return value from the CheckConsistency() method. 511 // It returns the status the range was found in. 512 message CheckConsistencyResponse { 513 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 514 515 enum Status { 516 // No inconsistency was detected, but not all replicas returned a checksum. 517 RANGE_INDETERMINATE = 0; 518 // A definite inconsistency was detected. 519 RANGE_INCONSISTENT = 1; 520 // All replicas of the range agreed on the checksum. 521 RANGE_CONSISTENT = 2; 522 // Like RANGE_CONSISTENT, but the recomputed stats disagreed with the 523 // persisted stats. The persisted stats indicates estimates, so this is 524 // expected. 525 RANGE_CONSISTENT_STATS_ESTIMATED = 3; 526 // Like RANGE_CONSISTENT_STATS_ESTIMATED, but the mismatch occurred with 527 // persisted stats that claimed to be accurate. This is unexpected and 528 // likely indicates a bug in our logic to incrementally update the stats 529 // as commands are evaluated and applied. 530 RANGE_CONSISTENT_STATS_INCORRECT = 4; 531 } 532 533 message Result { 534 int64 range_id = 1 [(gogoproto.customname) = "RangeID", (gogoproto.casttype) = "RangeID"]; 535 // start_key of the range corresponding to range_id (at the time of the 536 // check). This is useful to send additional requests to only a subset of 537 // ranges contained within a result later, as requests can only be routed by 538 // key. 539 bytes start_key = 2; 540 Status status = 3; 541 // detail contains information related to the operation. If no inconsistency 542 // is found, it contains informational value such as observed stats. If an 543 // inconsistency is found, it contains information about that inconsistency 544 // including the involved replica and, if requested, the diff. 545 string detail = 4; 546 } 547 548 // result contains a Result for each Range checked, in no particular order. 549 repeated Result result = 2 [(gogoproto.nullable) = false]; 550 } 551 552 // An RecomputeStatsRequest triggers a stats recomputation on the Range addressed by 553 // the request. 554 // 555 // An error will be returned if the start key does not match the start key of the 556 // target Range. 557 // 558 // The stats recomputation touches essentially the whole range, but the command 559 // avoids having to block other commands by taking care to not interleave 560 // with splits, and by using the commutativity of stats updates. As a result, 561 // it is safe to invoke at any time, including repeatedly, though it should be 562 // used conservatively due to performing a full scan of the Range. 563 message RecomputeStatsRequest { 564 option (gogoproto.equal) = true; 565 566 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 567 // When dry_run is true, the stats delta is computed, but no stats adjustment 568 // is performed. This isn't useful outside of testing since RecomputeStats is 569 // safe and idempotent. 570 bool dry_run = 2; 571 } 572 573 // An RecomputeStatsResponse is the response to an RecomputeStatsRequest. 574 message RecomputeStatsResponse { 575 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 576 577 // added_delta is the adjustment made to the range's stats, i.e. `new_stats = old_stats + added_delta`. 578 storage.enginepb.MVCCStatsDelta added_delta = 2 [(gogoproto.nullable) = false]; 579 } 580 581 // An EndTxnRequest is the argument to the EndTxn() method. It specifies 582 // whether to commit or roll back an extant transaction. 583 message EndTxnRequest { 584 option (gogoproto.equal) = true; 585 586 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 587 // False to abort and rollback. 588 bool commit = 2; 589 // If set, deadline represents the maximum (exclusive) timestamp at which the 590 // transaction can commit (i.e. the maximum timestamp for the txn's reads and 591 // writes). 592 // If EndTxn(Commit=true) finds that the txn's timestamp has been pushed above 593 // this deadline, an error will be returned and the client is supposed to 594 // rollback the txn. 595 util.hlc.Timestamp deadline = 3; 596 // commit triggers. Note that commit triggers are for 597 // internal use only and will cause an error if requested through the 598 // external-facing KV API. 599 InternalCommitTrigger internal_commit_trigger = 4; 600 // Set of spans that the transaction has acquired locks within. These are 601 // spans which must be resolved on txn completion. Note that these spans 602 // may be condensed to cover aggregate spans if the keys locked by the 603 // transaction exceeded a size threshold. 604 // 605 // The set logically extends to include the keys of all writes in the 606 // in-flight write set. However, those keys are not stored in this set 607 // to avoid duplication. This means that elements that are removed from 608 // that set should be merged into this one. 609 // 610 // The slice is maintained in sorted order and all spans are maximally 611 // merged such that no two spans here overlap each other. 612 repeated Span lock_spans = 5 [(gogoproto.nullable) = false]; 613 // Set of in-flight intent writes that have been issued by the transaction but 614 // which may not have succeeded yet. If any promised writes are provided, a 615 // committing EndTxn request will move a PENDING transaction to the STAGING 616 // status instead of the COMMITTED status. These in-flight writes must then 617 // all be confirmed as successful before the transaction can be moved from 618 // STAGING to COMMITTED. For more, see txnCommitter. 619 // 620 // The slice is maintained in sorted order by sequence number. This provides 621 // O(log n) access to individual writes in this set based on their sequence 622 // number. See SequencedWriteBySeq.Find and its uses. The set can contain 623 // multiple SequencedWrites with the same key, but all sequence numbers are 624 // unique. 625 repeated SequencedWrite in_flight_writes = 17 [(gogoproto.nullable) = false]; 626 // Requires that the transaction completes as a 1 phase commit. This 627 // guarantees that all writes are to the same range and that no 628 // intents are left in the event of an error. 629 // 630 // Note(andrei): Use this flag with care; retriable errors are not generated 631 // reliably for these transactions - a TransactionStatusError might be 632 // returned instead if 1PC execution fails. 633 bool require_1pc = 6 [(gogoproto.customname) = "Require1PC"]; 634 // CanCommitAtHigherTimestamp indicates that the batch this EndTxn is part of 635 // can be evaluated at a higher timestamp than the transaction's read 636 // timestamp. This is set by the client if the transaction has not performed 637 // any reads that must be refreshed prior to sending this current batch. When 638 // set, it allows the server to handle pushes and write too old conditions 639 // locally. 640 // TODO(nvanbenschoten): remove this in favor of can_forward_read_timestamp 641 // in v20.2. 642 bool can_commit_at_higher_timestamp = 8; 643 // True to indicate that lock spans should be resolved with poison=true. 644 // This is used when the transaction is being aborted independently of the 645 // main thread of client operation, as in the case of an asynchronous abort 646 // from the TxnCoordSender on a failed heartbeat. It should only be set to 647 // true when commit=false. 648 bool poison = 9; 649 reserved 7; 650 } 651 652 // An EndTxnResponse is the return value from the EndTxn() method. The final 653 // transaction record is returned as part of the response header. In particular, 654 // transaction status and timestamp will be updated to reflect final committed 655 // values. Clients may propagate the transaction timestamp as the final txn 656 // commit timestamp in order to preserve causal ordering between subsequent 657 // transactions. 658 message EndTxnResponse { 659 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 660 reserved 2; 661 reserved 3; 662 // True if the transaction committed on the one phase commit path. 663 // This means that all writes which were part of the transaction 664 // were written as a single, atomic write batch to just one range. 665 bool one_phase_commit = 4; 666 // The commit timestamp of the STAGING transaction record written 667 // by the request. Only set if the transaction record was staged. 668 util.hlc.Timestamp staging_timestamp = 5 [(gogoproto.nullable) = false]; 669 } 670 671 // An AdminSplitRequest is the argument to the AdminSplit() method. The 672 // existing range which contains header.key is split by 673 // split_key. If split_key is not specified, then this method will 674 // determine a split key that is roughly halfway through the 675 // range. The existing range is resized to cover only its start key to 676 // the split key. The new range created by the split starts at the 677 // split key and extends to the original range's end key. If split_key 678 // is known, header.key should also be set to split_key. 679 // 680 // New range IDs for each of the split range's replica and a new Raft 681 // ID are generated by the operation. Split requests are done in the 682 // context of a distributed transaction which updates range addressing 683 // records, range metadata and finally, provides a commit trigger to 684 // update bookkeeping and instantiate the new range on commit. 685 // 686 // The new range contains range replicas located on the same stores; 687 // no range data is moved during this operation. The split can be 688 // thought of as a mostly logical operation, though some other 689 // metadata (e.g. abort span and range stats must be copied or 690 // recomputed). 691 // 692 // expiration_time represents the time that this split expires. Any split that 693 // is not expired will not be considered for automatic merging by the merge 694 // queue. Any split requested by the split queue will have an expiration time 695 // of hlc.Timestamp{} (I.E. The zero timestamp so they are always eligible for 696 // automatic merging). 697 message AdminSplitRequest { 698 option (gogoproto.equal) = true; 699 700 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 701 bytes split_key = 2 [(gogoproto.casttype) = "Key"]; 702 reserved 3; 703 util.hlc.Timestamp expiration_time = 4 [(gogoproto.nullable) = false]; 704 } 705 706 // An AdminSplitResponse is the return value from the AdminSplit() 707 // method. 708 message AdminSplitResponse { 709 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 710 } 711 712 // An AdminUnsplitRequest is the argument to the AdminUnsplit() 713 // method. The sticky bit of the existing range whose starting key is 714 // header.key is removed. 715 // 716 // Ranges that do not have the sticky bit set are eligible for 717 // automatic merging. 718 message AdminUnsplitRequest { 719 option (gogoproto.equal) = true; 720 721 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 722 } 723 724 // An AdminUnsplitResponse is the return value from the 725 // AdminUnsplit() method. 726 message AdminUnsplitResponse { 727 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 728 } 729 730 // An AdminMergeRequest is the argument to the AdminMerge() method. A 731 // merge is performed by calling AdminMerge on the left-hand range of 732 // two consecutive ranges (i.e. the range which contains keys which 733 // sort first). This range will be the subsuming range and the right 734 // hand range will be subsumed. After the merge operation, the 735 // subsumed range will no longer exist and the subsuming range will 736 // now encompass all keys from its original start key to the end key 737 // of the subsumed range. If AdminMerge is called on the final range 738 // in the key space, it is a noop. 739 // The request must be addressed to the start key of the left hand side. 740 message AdminMergeRequest { 741 option (gogoproto.equal) = true; 742 743 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 744 } 745 746 // An AdminMergeResponse is the return value from the AdminMerge() 747 // method. 748 message AdminMergeResponse { 749 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 750 } 751 752 // An AdminTransferLeaseRequest is the argument to the AdminTransferLease() 753 // method. A lease transfer allows an external entity to control the lease 754 // holder for a range. The target of the lease transfer needs to be a valid 755 // replica of the range. 756 message AdminTransferLeaseRequest { 757 option (gogoproto.equal) = true; 758 759 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 760 int32 target = 2 [(gogoproto.casttype) = "StoreID"]; 761 } 762 763 message AdminTransferLeaseResponse { 764 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 765 } 766 767 // A ReplicationChange specifies the type and target of a replication change operation. 768 message ReplicationChange { 769 option (gogoproto.equal) = true; 770 771 ReplicaChangeType change_type = 1; 772 ReplicationTarget target = 2 [(gogoproto.nullable) = false]; 773 } 774 775 // An AdminChangeReplicasRequest is the argument to the AdminChangeReplicas() 776 // method. A change replicas operation allows adding or removing a set of 777 // replicas for a range. 778 message AdminChangeReplicasRequest { 779 option (gogoproto.equal) = true; 780 781 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 782 // Never access directly: use .Changes() 783 // 784 // TODO(tbg): remove in 20.1 785 ReplicaChangeType deprecated_change_type = 2; 786 // Never access directly: use .Changes() 787 // 788 // TODO(tbg): remove in 20.1 789 repeated ReplicationTarget deprecated_targets = 3 [(gogoproto.nullable) = false]; 790 // ExpDesc is the expected current range descriptor to modify. If the range 791 // descriptor is not identical to ExpDesc for the request will fail. 792 // 793 // If there is more than one change specified in targets, this expectation 794 // will be applied to the first change and subsequent changes will use the 795 // resultant descriptor from successfully applying the previous change. 796 // If a change with more than one target occurs concurrently with another 797 // it is possible that an error will occur after partial application of the 798 // change. Changes are applied in the order they appear in the request. 799 RangeDescriptor exp_desc = 4 [(gogoproto.nullable) = false]; 800 801 // The changes to apply to exp_desc. Never access directly: use .Changes(). 802 // 803 // TODO(tbg): rename to 'changes' in 20.1 and remove Changes(). 804 repeated ReplicationChange internal_changes = 5 [(gogoproto.nullable) = false]; 805 } 806 807 message AdminChangeReplicasResponse { 808 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 809 // Desc is the value of the range descriptor upon success. 810 RangeDescriptor desc = 2 [(gogoproto.nullable) = false]; 811 } 812 813 // An AdminRelocateRangeRequest is the argument to the AdminRelocateRange() 814 // method. Relocates the replicas for a range to the specified target stores. 815 // The first store in the list of targets becomes the new leaseholder. 816 message AdminRelocateRangeRequest { 817 option (gogoproto.equal) = true; 818 819 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 820 repeated ReplicationTarget targets = 2 [(gogoproto.nullable) = false]; 821 // TODO(a-robinson): Add "reason"/"details" string fields? 822 } 823 824 message AdminRelocateRangeResponse { 825 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 826 } 827 828 // A HeartbeatTxnRequest is arguments to the HeartbeatTxn() 829 // method. It's sent by transaction coordinators to let the system 830 // know that the transaction is still ongoing. Note that this 831 // heartbeat message is different from the heartbeat message in the 832 // gossip protocol. 833 message HeartbeatTxnRequest { 834 option (gogoproto.equal) = true; 835 836 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 837 util.hlc.Timestamp now = 2 [(gogoproto.nullable) = false]; 838 } 839 840 // A HeartbeatTxnResponse is the return value from the HeartbeatTxn() 841 // method. It returns the transaction info in the response header. The 842 // returned transaction lets the coordinator know the disposition of 843 // the transaction (i.e. aborted, committed, or pending). 844 message HeartbeatTxnResponse { 845 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 846 } 847 848 // A GCRequest is arguments to the GC() method. It's sent by range 849 // lease holders after scanning range data to find expired MVCC values. 850 message GCRequest { 851 option (gogoproto.equal) = true; 852 853 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 854 855 message GCKey { 856 option (gogoproto.equal) = true; 857 858 bytes key = 1 [(gogoproto.casttype) = "Key"]; 859 util.hlc.Timestamp timestamp = 2 [(gogoproto.nullable) = false]; 860 } 861 repeated GCKey keys = 3 [(gogoproto.nullable) = false]; 862 // Threshold is the expiration timestamp. 863 util.hlc.Timestamp threshold = 4 [(gogoproto.nullable) = false]; 864 865 reserved 5; 866 } 867 868 // A GCResponse is the return value from the GC() method. 869 message GCResponse { 870 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 871 } 872 873 // PushTxnType determines what action to take when pushing a transaction. 874 enum PushTxnType { 875 option (gogoproto.goproto_enum_prefix) = false; 876 877 // Push the timestamp forward if possible to accommodate a concurrent reader. 878 PUSH_TIMESTAMP = 0; 879 // Abort the transaction if possible to accommodate a concurrent writer. 880 PUSH_ABORT = 1; 881 // Abort the transaction if it's abandoned, but don't attempt to mutate it 882 // otherwise. 883 PUSH_TOUCH = 2; 884 885 reserved 3; 886 } 887 888 // A PushTxnRequest is arguments to the PushTxn() method. It's sent by 889 // readers or writers which have encountered an "intent" laid down by 890 // another transaction. The goal is to resolve the conflict. Note that 891 // args.Key should be set to the txn ID of args.PusheeTxn, not 892 // args.PusherTxn. This RPC is addressed to the range which owns the pushee's 893 // txn record. 894 // 895 // Resolution is trivial if the txn which owns the intent has either 896 // been committed or aborted already. Otherwise, the existing txn can 897 // either be aborted (for write/write conflicts), or its commit 898 // timestamp can be moved forward (for read/write conflicts). The 899 // course of action is determined by the specified push type, and by 900 // the owning txn's status and priority. 901 message PushTxnRequest { 902 option (gogoproto.equal) = true; 903 904 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 905 // Transaction which encountered the intent, if applicable. For a 906 // non-transactional pusher, pusher_txn will only have the priority set (in 907 // particular, ID won't be set). Used to compare priorities and timestamps if 908 // priorities are equal. 909 Transaction pusher_txn = 2 [(gogoproto.nullable) = false]; 910 // Transaction to be pushed, as specified at the intent which led to 911 // the push transaction request. Note that this may not be the most 912 // up-to-date value of the transaction record, but will be set or 913 // merged as appropriate. 914 storage.enginepb.TxnMeta pushee_txn = 3 [(gogoproto.nullable) = false]; 915 // PushTo is the timestamp which PusheeTxn should be pushed to. During 916 // conflict resolution, it should be set just after the timestamp of the 917 // conflicting read or write. 918 util.hlc.Timestamp push_to = 4 [(gogoproto.nullable) = false]; 919 // Readers set this to PUSH_TIMESTAMP to move pushee_txn's provisional 920 // commit timestamp forward. Writers set this to PUSH_ABORT to request 921 // that pushee_txn be aborted if possible. Inconsistent readers set 922 // this to PUSH_TOUCH to determine whether the pushee can be aborted 923 // due to inactivity (based on the now field). 924 PushTxnType push_type = 6; 925 // Forces the push by overriding the normal expiration and priority checks 926 // in PushTxn to either abort or push the timestamp. 927 bool force = 7; 928 929 reserved 5, 8, 9; 930 } 931 932 // A PushTxnResponse is the return value from the PushTxn() method. It 933 // returns success and the resulting state of PusheeTxn if the 934 // conflict was resolved in favor of the caller; the caller should 935 // subsequently invoke ResolveIntent() on the conflicted key. It 936 // returns an error otherwise. 937 message PushTxnResponse { 938 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 939 // pushee_txn is non-nil if the transaction was pushed and contains 940 // the current value of the transaction. 941 // TODO(tschottdorf): Maybe this can be a TxnMeta instead; probably requires 942 // factoring out the new Priority. 943 Transaction pushee_txn = 2 [(gogoproto.nullable) = false]; 944 } 945 946 // A RecoverTxnRequest is arguments to the RecoverTxn() method. It is sent 947 // during the recovery process for a transaction abandoned in the STAGING state. 948 // The sender is expected to have queried all of the abandoned transaction's 949 // in-flight writes and determined whether they all succeeded or not. This is 950 // used to determine whether the result of the recovery should be committing the 951 // abandoned transaction or aborting it. 952 message RecoverTxnRequest { 953 option (gogoproto.equal) = true; 954 955 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 956 // Transaction record to recover. 957 storage.enginepb.TxnMeta txn = 2 [(gogoproto.nullable) = false]; 958 // Did all of the STAGING transaction's writes succeed? If so, the transaction 959 // is implicitly committed and the commit can be made explicit by giving its 960 // record a COMMITTED status. If not, the transaction can be aborted as long 961 // as a write that was found to have failed was prevented from ever succeeding 962 // in the future. 963 bool implicitly_committed = 3; 964 } 965 966 // A RecoverTxnResponse is the return value from the RecoverTxn() method. 967 message RecoverTxnResponse { 968 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 969 970 // Contains the finalized state of the recovered transaction. 971 Transaction recovered_txn = 2 [(gogoproto.nullable) = false]; 972 } 973 974 // A QueryTxnResponse is arguments to the QueryTxn() method. It's sent 975 // by transactions which are waiting to push another transaction because 976 // of conflicting write intents to fetch updates to either the pusher's 977 // or the pushee's transaction records. 978 message QueryTxnRequest { 979 option (gogoproto.equal) = true; 980 981 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 982 // Transaction record to query. 983 storage.enginepb.TxnMeta txn = 2 [(gogoproto.nullable) = false]; 984 // If true, the query will not return until there are changes to either the 985 // transaction status or priority -OR- to the set of dependent transactions. 986 bool wait_for_update = 3; 987 // Set of known dependent transactions. 988 repeated bytes known_waiting_txns = 4 [(gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"]; 989 } 990 991 // A QueryTxnResponse is the return value from the QueryTxn() method. 992 message QueryTxnResponse { 993 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 994 // Contains the current state of the queried transaction. If the queried 995 // transaction record does not exist, this will be empty. 996 Transaction queried_txn = 2 [(gogoproto.nullable) = false]; 997 // Specifies a list of transaction IDs which are waiting on the txn. 998 repeated bytes waiting_txns = 3 [(gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"]; 999 } 1000 1001 // A QueryIntentRequest is arguments to the QueryIntent() method. It visits 1002 // the specified key and checks whether an intent is present for the given 1003 // transaction. If the intent is found to be missing then it is prevented 1004 // from ever being written in the future. 1005 message QueryIntentRequest { 1006 option (gogoproto.equal) = true; 1007 1008 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1009 1010 // The TxnMeta that the intent is expected to have. Specifically, whether an 1011 // intent is a match or not is defined as whether an intent exists that could 1012 // be committed by the provided transaction. If an intent is found at the 1013 // specified key, the intent is only considered a match if it has the same ID, 1014 // the same epoch, and a provisional commit timestamp that is equal to or less 1015 // than that in the provided transaction. The TxnMeta's provisional commit 1016 // timestamp is forwarded by the provisional commit timestamp of the request 1017 // header transaction if the transactions are the same (i.e. a transaction is 1018 // querying its own intent after successfully having refreshed). 1019 // 1020 // Additionally, the intent is only considered a match if its sequence number 1021 // is equal to or greater than the expected txn's sequence number. The 1022 // requests doesn't require an exact sequence number match because the 1023 // transaction could have performed overlapping writes, in which case only the 1024 // latest sequence number will remain. We assume that if a transaction has 1025 // successfully written an intent at a larger sequence number then it must 1026 // have succeeeded in writing an intent at the smaller sequence number as 1027 // well. 1028 storage.enginepb.TxnMeta txn = 2 [(gogoproto.nullable) = false]; 1029 1030 // If true, return an IntentMissingError if a matching intent is not found. 1031 // Special-cased to return a SERIALIZABLE retry error if a SERIALIZABLE 1032 // transaction queries its own intent and finds it has been pushed. 1033 bool error_if_missing = 3; 1034 } 1035 1036 // A QueryIntentResponse is the return value from the QueryIntent() method. 1037 message QueryIntentResponse { 1038 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1039 // Whether an intent matching the expected transaction was found at the key. 1040 bool found_intent = 2; 1041 } 1042 1043 // A ResolveIntentRequest is arguments to the ResolveIntent() 1044 // method. It is sent by transaction coordinators after success 1045 // calling PushTxn to clean up write intents: either to remove, commit 1046 // or move them forward in time. 1047 message ResolveIntentRequest { 1048 option (gogoproto.equal) = true; 1049 1050 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1051 // The transaction whose intent is being resolved. 1052 storage.enginepb.TxnMeta intent_txn = 2 [(gogoproto.nullable) = false]; 1053 // The status of the transaction. 1054 TransactionStatus status = 3; 1055 // Optionally poison the abort span for the transaction the intent's 1056 // range. 1057 bool poison = 4; 1058 // The list of ignored seqnum ranges as per the Transaction record. 1059 repeated storage.enginepb.IgnoredSeqNumRange ignored_seqnums = 5 [ 1060 (gogoproto.nullable) = false, 1061 (gogoproto.customname) = "IgnoredSeqNums" 1062 ]; 1063 } 1064 1065 // A ResolveIntentResponse is the return value from the 1066 // ResolveIntent() method. 1067 message ResolveIntentResponse { 1068 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1069 } 1070 1071 // A ResolveIntentRangeRequest is arguments to the ResolveIntentRange() method. 1072 // It is sent by transaction coordinators after success calling PushTxn to 1073 // clean up write intents: either to remove, commit or move them forward in 1074 // time. 1075 message ResolveIntentRangeRequest { 1076 option (gogoproto.equal) = true; 1077 1078 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1079 // The transaction whose intents are being resolved. 1080 storage.enginepb.TxnMeta intent_txn = 2 [(gogoproto.nullable) = false]; 1081 // The status of the transaction. 1082 TransactionStatus status = 3; 1083 // Optionally poison the abort span for the transaction on all ranges 1084 // on which the intents reside. 1085 bool poison = 4; 1086 // The minimum timestamp for any intents written by this 1087 // transaction. If present, this value can be used to optimize the 1088 // iteration over the span to find intents to resolve. 1089 util.hlc.Timestamp min_timestamp = 5 [(gogoproto.nullable) = false]; 1090 // The list of ignored seqnum ranges as per the Transaction record. 1091 repeated storage.enginepb.IgnoredSeqNumRange ignored_seqnums = 6 [ 1092 (gogoproto.nullable) = false, 1093 (gogoproto.customname) = "IgnoredSeqNums" 1094 ]; 1095 } 1096 1097 // A ResolveIntentRangeResponse is the return value from the 1098 // ResolveIntent() method. 1099 message ResolveIntentRangeResponse { 1100 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1101 } 1102 1103 // A MergeRequest contains arguments to the Merge() method. It 1104 // specifies a key and a value which should be merged into the 1105 // existing value at that key. 1106 message MergeRequest { 1107 option (gogoproto.equal) = true; 1108 1109 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1110 Value value = 2 [(gogoproto.nullable) = false]; 1111 } 1112 1113 // MergeResponse is the response to a Merge() operation. 1114 message MergeResponse { 1115 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1116 } 1117 1118 // TruncateLogRequest is used to remove a prefix of the raft log. While there 1119 // is no requirement for correctness that the raft log truncation be synchronized across 1120 // replicas, it is nice to preserve the property that all replicas of a range are as close 1121 // to identical as possible. The raft leader can also inform decisions about the cutoff point 1122 // with its knowledge of the replicas' acknowledgment status. 1123 message TruncateLogRequest { 1124 option (gogoproto.equal) = true; 1125 1126 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1127 1128 // Log entries < this index are to be discarded. 1129 uint64 index = 2; 1130 1131 // RangeID is used to double check that the correct range is being truncated. 1132 // The header specifies a span, start and end keys, but not the range id 1133 // itself. The range may have changed from the one specified in the header 1134 // in the case of a merge. 1135 int64 range_id = 3 [(gogoproto.customname) = "RangeID", (gogoproto.casttype) = "RangeID"]; 1136 } 1137 1138 // TruncateLogResponse is the response to a TruncateLog() operation. 1139 message TruncateLogResponse { 1140 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1141 } 1142 1143 // A RequestLeaseRequest is arguments to the RequestLease() 1144 // method. It is sent by the store on behalf of one of its ranges upon receipt 1145 // of a command requiring a lease when none is found. 1146 message RequestLeaseRequest { 1147 option (gogoproto.equal) = true; 1148 1149 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1150 Lease lease = 2 [(gogoproto.nullable) = false]; 1151 // The previous lease is specified by the caller to verify 1152 // it has not changed when executing this command. 1153 Lease prev_lease = 3 [(gogoproto.nullable) = false]; 1154 // The MinLeaseProposedTS of the proposing replica to make sure that leases 1155 // issued after a node restart receive a new sequence number (instead of 1156 // counting as a lease extension). See #23204. 1157 util.hlc.Timestamp min_proposed_ts = 4 [(gogoproto.customname) = "MinProposedTS"]; 1158 } 1159 1160 // A TransferLeaseRequest represents the arguments to the TransferLease() 1161 // method. It is sent by a replica that currently holds the range lease and 1162 // wants to transfer it away. 1163 // 1164 // Like a RequestLeaseRequest, this request has the effect of instituting a new 1165 // lease. The difference is that the new lease is allowed to overlap the 1166 // existing one. It is a separate request because the RequestLeaseRequest is 1167 // special - it's not subject to the same replay protection restrictions as 1168 // other requests, instead being protected from replays by the fact that leases 1169 // are not generally allowed to overlap. The TransferLeaseRequest is not 1170 // special in this respect (for example, the proposer of this command is 1171 // checked to have been holding the lease when the proposal was made). 1172 message TransferLeaseRequest { 1173 option (gogoproto.equal) = true; 1174 1175 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1176 Lease lease = 2 [(gogoproto.nullable) = false]; 1177 // The previous lease is specified by the caller to verify 1178 // it has not changed when executing this command. 1179 Lease prev_lease = 3 [(gogoproto.nullable) = false]; 1180 } 1181 1182 // LeaseInfoRequest is the argument to the LeaseInfo() method, for getting 1183 // information about a range's lease. 1184 // It's a point request, so it addresses one single range, and returns the lease 1185 // currently in effect for that range. 1186 message LeaseInfoRequest{ 1187 option (gogoproto.equal) = true; 1188 1189 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1190 } 1191 // LeaseInfoResponse is the response to a LeaseInfo() operation. 1192 message LeaseInfoResponse{ 1193 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1194 // The last lease known by the replica serving the request. It can also be the 1195 // tentative future lease, if a lease transfer is in progress. 1196 Lease lease = 2 [(gogoproto.nullable) = false]; 1197 } 1198 1199 // A RequestLeaseResponse is the response to a RequestLease() or TransferLease() 1200 // operation. 1201 message RequestLeaseResponse{ 1202 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1203 } 1204 1205 // A ComputeChecksumRequest is arguments to the ComputeChecksum() method, to 1206 // start computing the checksum for the specified range at the snapshot for this 1207 // request command. A response is returned without the checksum. The computed 1208 // checksum is retrieved via a storage.CollectChecksumRequest. 1209 message ComputeChecksumRequest { 1210 option (gogoproto.equal) = true; 1211 1212 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1213 // The version used to pick the checksum method. It allows us to use a 1214 // consistent checksumming method across replicas. 1215 uint32 version = 2; 1216 reserved 3; 1217 // Compute a checksum along with a snapshot of the entire range, that will be 1218 // used in logging a diff during checksum verification. 1219 bool snapshot = 4; 1220 // The type of checksum to compute. See ChecksumMode. 1221 ChecksumMode mode = 5; 1222 // If set, a checkpoint (i.e. cheap backup) of the engine will be taken. This 1223 // is expected to be set only if we already know that there is a problem and 1224 // we want to preserve as much state as possible. The checkpoint will be stored 1225 // in the engine's auxiliary directory. 1226 bool checkpoint = 6; 1227 // If non-empty, specifies the replicas which are the most likely source of the 1228 // inconsistency. After evaluating the command, these replicas will terminate. 1229 // 1230 // See the field of the same name in CheckConsistencyRequest for details. 1231 repeated ReplicaDescriptor terminate = 7 [(gogoproto.nullable) = false]; 1232 } 1233 1234 // A ComputeChecksumResponse is the response to a ComputeChecksum() operation. 1235 message ComputeChecksumResponse { 1236 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1237 1238 // ChecksumID is the unique identifier that can be used to get the computed 1239 // checksum in a future storage.CollectChecksumRequest. 1240 bytes checksum_id = 2 [(gogoproto.nullable) = false, 1241 (gogoproto.customname) = "ChecksumID", 1242 (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"]; 1243 } 1244 1245 enum ExternalStorageProvider { 1246 Unknown = 0; 1247 LocalFile = 1; 1248 Http = 2; 1249 S3 = 3; 1250 GoogleCloud = 4; 1251 Azure = 5; 1252 Workload = 6; 1253 } 1254 1255 message ExternalStorage { 1256 option (gogoproto.equal) = true; 1257 1258 ExternalStorageProvider provider = 1; 1259 1260 message LocalFilePath { 1261 option (gogoproto.equal) = true; 1262 1263 string path = 1; 1264 uint32 node_id = 2 [(gogoproto.customname) = "NodeID", (gogoproto.casttype) = "NodeID"]; 1265 } 1266 message Http { 1267 option (gogoproto.equal) = true; 1268 1269 string baseUri = 1; 1270 } 1271 message S3 { 1272 option (gogoproto.equal) = true; 1273 1274 string bucket = 1; 1275 string prefix = 2; 1276 1277 string access_key = 3; 1278 string secret = 4; 1279 string temp_token = 5; 1280 string endpoint = 6; 1281 string region = 7; 1282 string auth = 8; 1283 } 1284 message GCS { 1285 option (gogoproto.equal) = true; 1286 1287 string bucket = 1; 1288 string prefix = 2; 1289 string auth = 3; 1290 1291 // BillingProject if non-empty, is the Google Cloud project to bill for all storage requests. 1292 // This is required to be set if using a "requestor pays" bucket. 1293 string billing_project = 4; 1294 1295 string credentials = 5; 1296 } 1297 message Azure { 1298 option (gogoproto.equal) = true; 1299 1300 string container = 1; 1301 string prefix = 2; 1302 1303 string account_name = 3; 1304 string account_key = 4; 1305 } 1306 message Workload { 1307 option (gogoproto.equal) = true; 1308 1309 string generator = 1; 1310 string version = 2; 1311 string table = 3; 1312 repeated string flags = 4; 1313 string format = 5; 1314 int64 batch_begin = 6; 1315 int64 batch_end = 7; 1316 } 1317 LocalFilePath LocalFile = 2 [(gogoproto.nullable) = false]; 1318 Http HttpPath = 3 [(gogoproto.nullable) = false]; 1319 GCS GoogleCloudConfig = 4; 1320 S3 S3Config = 5; 1321 Azure AzureConfig = 6; 1322 Workload WorkloadConfig = 7; 1323 } 1324 1325 // WriteBatchRequest is arguments to the WriteBatch() method, to apply the 1326 // operations encoded in a BatchRepr. 1327 message WriteBatchRequest { 1328 option (gogoproto.equal) = true; 1329 1330 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1331 // The span of keys encoded in data, duplicated because the header span can 1332 // be modified by DistSender and we use this one to fail fast. 1333 Span data_span = 2 [(gogoproto.nullable) = false]; 1334 // A BatchRepr, the serialized form of a RocksDB Batch. 1335 bytes data = 3; 1336 } 1337 1338 // WriteBatchResponse is the response to a WriteBatch() operation. 1339 message WriteBatchResponse { 1340 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1341 } 1342 1343 enum MVCCFilter { 1344 Latest = 0; 1345 All = 1; 1346 } 1347 1348 message FileEncryptionOptions { 1349 option (gogoproto.equal) = true; 1350 // Key specifies the key to use for encryption or decryption. 1351 bytes key = 1; 1352 } 1353 1354 // ExportRequest is the argument to the Export() method, to dump a keyrange into 1355 // files under a basepath. 1356 message ExportRequest { 1357 option (gogoproto.equal) = true; 1358 1359 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1360 ExternalStorage storage = 2 [(gogoproto.nullable) = false]; 1361 util.hlc.Timestamp start_time = 3 [(gogoproto.nullable) = false]; 1362 MVCCFilter mvcc_filter = 4 [(gogoproto.customname) = "MVCCFilter"]; 1363 1364 // Return the exported SST data in the response. 1365 bool return_sst = 5 [(gogoproto.customname) = "ReturnSST"]; 1366 // OmitChecksum, if true, will skip checksumming the sst and leave the 1367 // `Sha512` field empty in the response. During a rolling upgrade to 2.1, it 1368 // may still be set if the request is served by an old node, but since the 1369 // caller has declare they're not going to use it, that's okay. 1370 bool omit_checksum = 6; 1371 1372 // EnableTimeBoundIteratorOptimization, if true, enables a performance 1373 // optimization that allows us to entirely skip over sstables in RocksDB that 1374 // don't have data relevant to the time bounds in this request. 1375 // 1376 // This can have a dramatic impact on performance, but we've seen a number of 1377 // extremely subtle and hard to detect correctness issues with this (see 1378 // #28358 #34819). As a result, we've decided to skip the optimization 1379 // everywhere that it isn't absolutely necessary for the feature to work 1380 // (leaving one place: poller-based changefeeds, which are being phased out 1381 // anyway). This will both give increased confidence in correctness as well as 1382 // eliminate any need to investigate time-bound iterators when/if someone hits 1383 // a correctness bug. 1384 bool enable_time_bound_iterator_optimization = 7; 1385 // StorageByLocalityKV is a map of locality KVs to storage configurations. If 1386 // set, files will be written to the store that matches the most specific 1387 // locality KV in the map. 1388 map<string, ExternalStorage> storage_by_locality_kv = 8 [(gogoproto.customname) = "StorageByLocalityKV"]; 1389 1390 FileEncryptionOptions encryption = 9; 1391 1392 // TargetFileSize is the byte size target for individual files in the 1393 // response. If the MVCCFilter is Latest, the returned files will only be 1394 // larger than this value if an individual KV pair is larger than this value. 1395 // If the MVCCFilter is All then the file may exceed this value by at most the 1396 // size of all versions of a single key. If TargetFileSize is non-positive 1397 // then there is no limit. 1398 int64 target_file_size = 10; 1399 } 1400 1401 // BulkOpSummary summarizes the data processed by an operation, counting the 1402 // total size as well as number of entries processed in each index (from which 1403 // row counts can be derived). 1404 message BulkOpSummary { 1405 // DataSize is the sum of key and value lengths. 1406 int64 data_size = 1; 1407 // DeprecatedRows contained the row count when "rows" were always defined as 1408 // entries in the index with ID 1, however since 20.1 and the introduction of 1409 // PK changes, the low-level counters that produce BulkOpSummaries are unable 1410 // to assume which index is primary and thus cannot distinguish "rows" vs 1411 // "index entries". Callers wishing to get a "row count" from a BulkOpSummary 1412 // should use EntryCounts instead, fetching the count for the table/index that 1413 // corresponds to the PK. 1414 int64 deprecated_rows = 2; 1415 // DeprecatedIndexEntries contained the index entry count prior to 20.1. See 1416 // the comment on DeprecatedRows for details. 1417 int64 deprecated_index_entries = 3; 1418 1419 reserved 4; 1420 // EntryCounts contains the number of keys processed for each tableID/indexID 1421 // pair, stored under the key (tableID << 32) | indexID. This EntryCount key 1422 // generation logic is also available in the BulkOpSummaryID helper. 1423 map<uint64, int64> entry_counts = 5; 1424 } 1425 1426 // ExportResponse is the response to an Export() operation. 1427 message ExportResponse { 1428 // File describes a keyrange that has been dumped to a file at the given 1429 // path. 1430 message File { 1431 Span span = 1 [(gogoproto.nullable) = false]; 1432 string path = 2; 1433 reserved 3; 1434 reserved 4; 1435 bytes sha512 = 5; 1436 1437 BulkOpSummary exported = 6 [(gogoproto.nullable) = false]; 1438 1439 bytes sst = 7 [(gogoproto.customname) = "SST"]; 1440 string locality_kv = 8 [(gogoproto.customname) = "LocalityKV"]; 1441 } 1442 1443 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1444 repeated File files = 2 [(gogoproto.nullable) = false]; 1445 util.hlc.Timestamp start_time = 3 [(gogoproto.nullable) = false]; 1446 } 1447 1448 // ImportRequest is the argument to the Import() method, to bulk load key/value 1449 // entries. 1450 message ImportRequest { 1451 option (gogoproto.equal) = true; 1452 1453 message File { 1454 option (gogoproto.equal) = true; 1455 1456 ExternalStorage dir = 1 [(gogoproto.nullable) = false]; 1457 string path = 2; 1458 reserved 3; 1459 bytes sha512 = 4; 1460 } 1461 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1462 // Files contains an ordered list of files, each containing kv entries to 1463 // import. Entries in later files with the same key override earlier ones. 1464 repeated File files = 2 [(gogoproto.nullable) = false]; 1465 // DataSpan is the pre-rewrite keyrange of the data in `Files`. 1466 Span data_span = 3 [(gogoproto.nullable) = false]; 1467 // EndTime, if not the zero value, will cause only entries before it to be 1468 // imported. 1469 util.hlc.Timestamp end_time = 6 [(gogoproto.nullable) = false]; 1470 reserved 4; 1471 message TableRekey { 1472 option (gogoproto.equal) = true; 1473 1474 // OldID is the previous ID of `new_desc`. 1475 uint32 old_id = 1 [(gogoproto.customname) = "OldID"]; 1476 // NewDesc is an encoded Descriptor message. 1477 bytes new_desc = 2; 1478 } 1479 // Rekeys contains the descriptors for the data being Imported and the 1480 // previous ID for each (which is the ID used in the source data pointed to by 1481 // `files`). 1482 // TODO(dan): This field is a superset of the information represented by 1483 // `key_rewrites` and will supercede it once rekeying of interleaved tables is 1484 // fixed. 1485 repeated TableRekey rekeys = 5 [(gogoproto.nullable) = false]; 1486 1487 FileEncryptionOptions encryption = 7; 1488 } 1489 1490 // ImportResponse is the response to a Import() operation. 1491 message ImportResponse { 1492 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1493 reserved 2; 1494 BulkOpSummary imported = 3 [(gogoproto.nullable) = false]; 1495 } 1496 1497 // AdminScatterRequest is the argument to the AdminScatter() method, which moves 1498 // replicas and leaseholders for a selection of ranges. Scatter is best-effort; 1499 // ranges that cannot be moved will include an error detail in the response and 1500 // won't fail the request. 1501 message AdminScatterRequest { 1502 option (gogoproto.equal) = true; 1503 1504 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1505 bool randomize_leases = 2; 1506 } 1507 1508 // ScatterResponse is the response to a Scatter() operation. 1509 message AdminScatterResponse { 1510 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1511 1512 message Range { 1513 Span span = 1 [(gogoproto.nullable) = false]; 1514 reserved 2; 1515 } 1516 repeated Range ranges = 2 [(gogoproto.nullable) = false]; 1517 } 1518 1519 // AdminVerifyProtectedTimestampRequest is the argument to the 1520 // AdminVerifyProtectedTimestamp method which ensures that the specified record 1521 // will be seen before data can be garbage collected at the timestamp. 1522 message AdminVerifyProtectedTimestampRequest { 1523 option (gogoproto.equal) = true; 1524 1525 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1526 1527 // RecordID is the ID of the protected timestamp Record being verified. 1528 bytes record_id = 4 [ 1529 (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID", 1530 (gogoproto.nullable) = false, 1531 (gogoproto.customname) = "RecordID" 1532 ]; 1533 1534 // Protected is the timestamp at which the record with RecordID protects. 1535 util.hlc.Timestamp protected = 2 [(gogoproto.nullable) = false]; 1536 1537 // RecordAliveAt is a an hlc timestamp at which the record being verified is 1538 // known to exist. A value for RecordAliveAt is generally determined by 1539 // reading a Record from the database and using the timestamp at which that 1540 // read occurred. 1541 util.hlc.Timestamp record_alive_at = 3 [(gogoproto.nullable) = false]; 1542 } 1543 1544 1545 // AdminVerifyProtectedTimestampResponse is the argument to the 1546 // AdminVerifyProtectedTimestamp method which ensures that the specified record 1547 // will be seen before data can be garbage collected at the timestamp. 1548 message AdminVerifyProtectedTimestampResponse { 1549 1550 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1551 bool verified = 2; 1552 1553 repeated RangeDescriptor failed_ranges = 3 [(gogoproto.nullable) = false]; 1554 } 1555 1556 // AddSSTableRequest is arguments to the AddSSTable() method, to link a file 1557 // into the RocksDB log-structured merge-tree. 1558 message AddSSTableRequest { 1559 option (gogoproto.equal) = true; 1560 1561 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1562 bytes data = 2; 1563 // If set, indicates that AddSSTable will not allow ingestion of keys which 1564 // shadow already existing key entries. This disallows any key slice overlap 1565 // regardless of the timestamps. 1566 bool disallow_shadowing = 3; 1567 // MVCCStats, if set, is the MVCCStats for the contents of this SSTable and is 1568 // used as-is during evaluation of the AddSSTable command to update the range 1569 // MVCCStats, instead of computing the stats for the SSTable by iterating it. 1570 // Including these stats can make the evaluation of AddSSTable much cheaper. 1571 storage.enginepb.MVCCStats mvcc_stats = 4 [(gogoproto.customname) = "MVCCStats"]; 1572 1573 // IngestAsWrites causes the content of the provided SSTable to be ingested in 1574 // a regular WriteBatch, instead of directly adding the provided SST to the 1575 // storage engine. This is useful if the data size is so small that the fixed 1576 // costs of adding an extra file (file IO, triggering a flush, compactions) 1577 // would be higher than the marginal costs of the amount of data going though 1578 // the usual write pipeline (on-disk raft log, WAL, etc). 1579 // TODO(dt): https://github.com/cockroachdb/cockroach/issues/34579#issuecomment-544627193 1580 bool ingest_as_writes = 5; 1581 } 1582 1583 // AddSSTableResponse is the response to a AddSSTable() operation. 1584 message AddSSTableResponse { 1585 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1586 } 1587 1588 // RefreshRequest is arguments to the Refresh() method, which verifies that no 1589 // write has occurred since the refresh_from timestamp to the specified key. 1590 // The timestamp cache is updated. A transaction must be supplied with this 1591 // request. If the key has been written more recently than the provided txn 1592 // timestamp, an error is returned and the timestamp cache is not updated. 1593 // 1594 // The timestamp cache is updated to txn.read_timestamp, like it is for all 1595 // requests. 1596 message RefreshRequest { 1597 option (gogoproto.equal) = true; 1598 1599 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1600 reserved 2; 1601 1602 // refresh_from specifies the lower-bound of the verification. The request verifies that 1603 // there's no write in the range [refresh_from, txn.read_timestamp]. 1604 util.hlc.Timestamp refresh_from = 3 [(gogoproto.nullable) = false]; 1605 } 1606 1607 // RefreshResponse is the response to a Refresh() operation. 1608 message RefreshResponse { 1609 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1610 } 1611 1612 // RefreshRangeRequest is arguments to the RefreshRange() method, which 1613 // is similar to RefreshRequest (see comments above), but operates on 1614 // a key span instead of a single key. 1615 message RefreshRangeRequest { 1616 option (gogoproto.equal) = true; 1617 1618 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1619 reserved 2; 1620 1621 // refresh_from specifies the lower-bound of the verification. The request verifies that 1622 // there's no write in the range [refresh_from, txn.read_timestamp]. 1623 util.hlc.Timestamp refresh_from = 3 [(gogoproto.nullable) = false]; 1624 } 1625 1626 // RefreshRangeResponse is the response to a RefreshRange() operation. 1627 message RefreshRangeResponse { 1628 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1629 } 1630 1631 // SubsumeRequest is the argument to the Subsume() method, which freezes a range 1632 // for merging with its left-hand neighbor. 1633 // 1634 // Subsume, when called correctly, provides important guarantees that ensure 1635 // there is no moment in time where the ranges involved in the merge could both 1636 // process commands for the same keys. See the comment on Subsume for details. 1637 // 1638 // Subsume may return stale MVCC statistics when used outside of a merge 1639 // transaction. As a rule of thumb, it is incorrect to call Subsume, except from 1640 // its carefully-chosen location within a merge transaction. 1641 message SubsumeRequest { 1642 option (gogoproto.equal) = true; 1643 1644 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1645 1646 // The range descriptor for the left-hand side of the merge. Used by the 1647 // right-hand side to sanity-check the validity of the merge. 1648 RangeDescriptor left_desc = 2 [(gogoproto.nullable) = false]; 1649 // The range descriptor for the right-hand side of the merge. Should match 1650 // the range descriptor of the range evaluating this request. 1651 RangeDescriptor right_desc = 3 [(gogoproto.nullable) = false]; 1652 } 1653 1654 // SubsumeResponse is the response to a SubsumeRequest. 1655 message SubsumeResponse { 1656 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1657 1658 reserved 2; 1659 1660 // MVCCStats are the MVCC statistics for the range. 1661 storage.enginepb.MVCCStats mvcc_stats = 3 [ 1662 (gogoproto.nullable) = false, 1663 (gogoproto.customname) = "MVCCStats" 1664 ]; 1665 1666 // LeaseAppliedIndex is the lease applied index of the last applied command 1667 // at the time that the Subsume request executed. This is NOT intended to be 1668 // the lease index of the SubsumeRequest itself. Instead, it is intended to 1669 // provide the sender of the Subsume request with an upper bound on the lease 1670 // applied index of the CPut that left an intent on the local copy of the 1671 // right-hand range descriptor. 1672 uint64 lease_applied_index = 4; 1673 1674 // FreezeStart is a timestamp that is guaranteed to be greater than the 1675 // timestamps at which any requests were serviced by the responding replica 1676 // before it stopped responding to requests altogether (in anticipation of 1677 // being subsumed). It is suitable for use as the timestamp cache's low water 1678 // mark for the keys previously owned by the subsumed range. 1679 util.hlc.Timestamp freeze_start = 5 [(gogoproto.nullable) = false]; 1680 } 1681 1682 // RangeStatsRequest is the argument to the RangeStats() method. It requests the 1683 // MVCC statistics of the receiving range. 1684 message RangeStatsRequest { 1685 option (gogoproto.equal) = true; 1686 1687 RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1688 } 1689 1690 // RangeStatsResponse is the response to a RangeStatsRequest. 1691 message RangeStatsResponse { 1692 ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1693 1694 // MVCCStats are the MVCC statistics for the range that processed the 1695 // request. 1696 storage.enginepb.MVCCStats mvcc_stats = 2 [ 1697 (gogoproto.nullable) = false, 1698 (gogoproto.customname) = "MVCCStats" 1699 ]; 1700 1701 // QueriesPerSecond is the rate of request/s or QPS for the range. 1702 double queries_per_second = 3; 1703 } 1704 1705 // A RequestUnion contains exactly one of the requests. 1706 // The values added here must match those in ResponseUnion. 1707 // 1708 // Be cautious about deprecating fields as doing so can lead to inconsistencies 1709 // between replicas. 1710 message RequestUnion { 1711 oneof value { 1712 GetRequest get = 1; 1713 PutRequest put = 2; 1714 ConditionalPutRequest conditional_put = 3; 1715 IncrementRequest increment = 4; 1716 DeleteRequest delete = 5; 1717 DeleteRangeRequest delete_range = 6; 1718 ClearRangeRequest clear_range = 38; 1719 RevertRangeRequest revert_range = 48; 1720 ScanRequest scan = 7; 1721 EndTxnRequest end_txn = 9; 1722 AdminSplitRequest admin_split = 10; 1723 AdminUnsplitRequest admin_unsplit = 47; 1724 AdminMergeRequest admin_merge = 11; 1725 AdminTransferLeaseRequest admin_transfer_lease = 29; 1726 AdminChangeReplicasRequest admin_change_replicas = 35; 1727 AdminRelocateRangeRequest admin_relocate_range = 45; 1728 HeartbeatTxnRequest heartbeat_txn = 12; 1729 GCRequest gc = 13; 1730 PushTxnRequest push_txn = 14; 1731 RecoverTxnRequest recover_txn = 46; 1732 ResolveIntentRequest resolve_intent = 16; 1733 ResolveIntentRangeRequest resolve_intent_range = 17; 1734 MergeRequest merge = 18; 1735 TruncateLogRequest truncate_log = 19; 1736 RequestLeaseRequest request_lease = 20; 1737 ReverseScanRequest reverse_scan = 21; 1738 ComputeChecksumRequest compute_checksum = 22; 1739 CheckConsistencyRequest check_consistency = 24; 1740 InitPutRequest init_put = 26; 1741 TransferLeaseRequest transfer_lease = 28; 1742 LeaseInfoRequest lease_info = 30; 1743 WriteBatchRequest write_batch = 31; 1744 ExportRequest export = 32; 1745 ImportRequest import = 34; 1746 QueryTxnRequest query_txn = 33; 1747 QueryIntentRequest query_intent = 42; 1748 AdminScatterRequest admin_scatter = 36; 1749 AddSSTableRequest add_sstable = 37; 1750 RecomputeStatsRequest recompute_stats = 39; 1751 RefreshRequest refresh = 40; 1752 RefreshRangeRequest refresh_range = 41; 1753 SubsumeRequest subsume = 43; 1754 RangeStatsRequest range_stats = 44; 1755 AdminVerifyProtectedTimestampRequest admin_verify_protected_timestamp = 49; 1756 } 1757 reserved 8, 15, 23, 25, 27; 1758 } 1759 1760 // A ResponseUnion contains exactly one of the responses. 1761 // The values added here must match those in RequestUnion. 1762 message ResponseUnion { 1763 oneof value { 1764 GetResponse get = 1; 1765 PutResponse put = 2; 1766 ConditionalPutResponse conditional_put = 3; 1767 IncrementResponse increment = 4; 1768 DeleteResponse delete = 5; 1769 DeleteRangeResponse delete_range = 6; 1770 ClearRangeResponse clear_range = 38; 1771 RevertRangeResponse revert_range = 48; 1772 ScanResponse scan = 7; 1773 EndTxnResponse end_txn = 9; 1774 AdminSplitResponse admin_split = 10; 1775 AdminUnsplitResponse admin_unsplit = 47; 1776 AdminMergeResponse admin_merge = 11; 1777 AdminTransferLeaseResponse admin_transfer_lease = 29; 1778 AdminChangeReplicasResponse admin_change_replicas = 35; 1779 AdminRelocateRangeResponse admin_relocate_range = 45; 1780 HeartbeatTxnResponse heartbeat_txn = 12; 1781 GCResponse gc = 13; 1782 PushTxnResponse push_txn = 14; 1783 RecoverTxnResponse recover_txn = 46; 1784 ResolveIntentResponse resolve_intent = 16; 1785 ResolveIntentRangeResponse resolve_intent_range = 17; 1786 MergeResponse merge = 18; 1787 TruncateLogResponse truncate_log = 19; 1788 RequestLeaseResponse request_lease = 20; 1789 ReverseScanResponse reverse_scan = 21; 1790 ComputeChecksumResponse compute_checksum = 22; 1791 CheckConsistencyResponse check_consistency = 24; 1792 InitPutResponse init_put = 26; 1793 LeaseInfoResponse lease_info = 30; 1794 WriteBatchResponse write_batch = 31; 1795 ExportResponse export = 32; 1796 ImportResponse import = 34; 1797 QueryTxnResponse query_txn = 33; 1798 QueryIntentResponse query_intent = 42; 1799 AdminScatterResponse admin_scatter = 36; 1800 AddSSTableResponse add_sstable = 37; 1801 RecomputeStatsResponse recompute_stats = 39; 1802 RefreshResponse refresh = 40; 1803 RefreshRangeResponse refresh_range = 41; 1804 SubsumeResponse subsume = 43; 1805 RangeStatsResponse range_stats = 44; 1806 AdminVerifyProtectedTimestampResponse admin_verify_protected_timestamp = 49; 1807 } 1808 reserved 8, 15, 23, 25, 27, 28; 1809 } 1810 1811 // A Header is attached to a BatchRequest, encapsulating routing and auxiliary 1812 // information required for executing it. 1813 message Header { 1814 // timestamp specifies time at which reads or writes should be performed. If 1815 // the timestamp is set to zero value, its value is initialized to the wall 1816 // time of the server node. 1817 // 1818 // Transactional requests are not allowed to set this field; they must rely on 1819 // the server to set it from txn.ReadTimestamp. Also, for transactional 1820 // requests, writes are performed at the provisional commit timestamp 1821 // (txn.WriteTimestamp). 1822 util.hlc.Timestamp timestamp = 1 [(gogoproto.nullable) = false]; 1823 // replica specifies the destination of the request. 1824 ReplicaDescriptor replica = 2 [(gogoproto.nullable) = false]; 1825 // range_id specifies the ID of the Raft consensus group which the key 1826 // range belongs to. This is used by the receiving node to route the 1827 // request to the correct range. 1828 int64 range_id = 3 [(gogoproto.customname) = "RangeID", (gogoproto.casttype) = "RangeID"]; 1829 // user_priority allows any command's priority to be biased from the 1830 // default random priority. It specifies a multiple. If set to 0.5, 1831 // the chosen priority will be 1/2x as likely to beat any default 1832 // random priority. If set to 1, a default random priority is 1833 // chosen. If set to 2, the chosen priority will be 2x as likely to 1834 // beat any default random priority, and so on. As a special case, 0 1835 // priority is treated the same as 1. This value is ignored if txn 1836 // is specified. The min and max user priorities are set via 1837 // MinUserPriority and MaxUserPriority in data.go. 1838 double user_priority = 4 [(gogoproto.casttype) = "UserPriority"]; 1839 // txn is set non-nil if a transaction is underway. To start a txn, 1840 // the first request should set this field to non-nil with name and 1841 // isolation level set as desired. The response will contain the 1842 // fully-initialized transaction with txn ID, priority, initial 1843 // timestamp, and maximum timestamp. 1844 Transaction txn = 5; 1845 // read_consistency specifies the consistency for read 1846 // operations. The default is CONSISTENT. This value is ignored for 1847 // write operations. 1848 ReadConsistencyType read_consistency = 6; 1849 // If set to a non-zero value, the total number of keys touched by requests in 1850 // the batch is limited. A resume span will be provided on the response of the 1851 // requests that were not able to run to completion before the limit was 1852 // reached. 1853 // 1854 // Overlapping requests 1855 // 1856 // The spans accessed by the requests are allowed to overlap. However, if any 1857 // requests overlap, the caller must be prepared to handle *multiple* partial 1858 // responses in the corresponding BatchResponse. If no requests overlap, then 1859 // only up to one request will return a partial result. Additionally, if two 1860 // requests touch the same key, it is double counted towards the key limit. 1861 // 1862 // Unordered requests 1863 // 1864 // The spans accessed by requests do not need to be in sorted order. However, 1865 // if the requests are not in sorted order (e.g. increasing key order for 1866 // Scans and other forward requests, decreasing key order for ReverseScans), 1867 // the caller must be prepared to handle empty responses interleaved with full 1868 // responses and one (or more, see "Overlapping requests") partial response 1869 // in the corresponding BatchResponse. If the requests are in sorted order, 1870 // the caller can expect to receive a group of full responses, one (or more) 1871 // partial responses, and a group of empty responses. 1872 // 1873 // Pagination of requests 1874 // 1875 // As discussed above, overlapping requests or unordered requests in batches 1876 // with a limit can lead to response batches with multiple partial responses. 1877 // In practice, this is because DistSender paginates request evaluation over 1878 // ranges in increasing key order (decreasing for reverse batches). As ranges 1879 // are iterated over in order, all requests that target a given range are sent 1880 // to it, regardless of their position in the batch. Once split and delivered 1881 // to a range, the applicable requests are executed in-full according to their 1882 // order in the batch. 1883 // 1884 // This behavior makes it difficult to make assumptions about the resume spans 1885 // of individual responses in batches that contain either overlapping requests 1886 // or unordered requests. As such, clients should not make assumptions about 1887 // resume spans and should instead inspect the result for every request in the 1888 // batch if if cannot guarantee that the batch is ordered with no overlapping 1889 // requests. 1890 // 1891 // Supported requests 1892 // 1893 // If a limit is provided, the batch can contain only the following range 1894 // request types: 1895 // - ScanRequest 1896 // - ReverseScanRequest 1897 // - DeleteRangeRequest 1898 // - RevertRangeRequest 1899 // - ResolveIntentRangeRequest 1900 // 1901 // The following two requests types are also allowed in the batch, although 1902 // the limit has no effect on them: 1903 // - QueryIntentRequest 1904 // - EndTxnRequest 1905 // 1906 // Forward requests and reverse requests cannot be mixed in the same batch if 1907 // a limit is set. There doesn't seem to be a fundamental reason for this 1908 // restriction, but a batch that mixed forward and reverse requests would be 1909 // impossible to order, so it would unavoidably have to deal with the added 1910 // complications discussed in "Unordered requests". For now, that's a good 1911 // enough reason to disallow such batches. 1912 int64 max_span_request_keys = 8; 1913 // If set to a non-zero value, sets a target (in bytes) for how large the 1914 // response may grow. This is only supported for (forward and reverse) scans 1915 // and limits the number of rows scanned (and returned). The target will be 1916 // overshot; in particular, at least one row will always be returned (assuming 1917 // one exists). A suitable resume span will be returned. 1918 // 1919 // The semantics around overlapping requests, unordered requests, and 1920 // supported requests from max_span_request_keys apply to the target_bytes 1921 // option as well. 1922 int64 target_bytes = 15; 1923 // If set, all of the spans in the batch are distinct. Note that the 1924 // calculation of distinct spans does not include intents in an 1925 // EndTxnRequest. Currently set conservatively: a request might be 1926 // composed of distinct spans yet have this field set to false. 1927 bool distinct_spans = 9; 1928 // If set, return_range_info causes RangeInfo details to be returned with 1929 // each ResponseHeader. 1930 bool return_range_info = 10; 1931 // gateway_node_id is the ID of the gateway node where the request originated. 1932 int32 gateway_node_id = 11 [(gogoproto.customname) = "GatewayNodeID", (gogoproto.casttype) = "NodeID"]; 1933 // If set, the request will return to the client before proposing the 1934 // request into Raft. All consensus processing will be performed 1935 // asynchronously. Because consensus may fail, this means that the 1936 // request cannot be expected to succeed. Instead, its success must 1937 // be verified. 1938 // TODO(nvanbenschoten): Handling cases where consensus fails would 1939 // be much more straightforward if all transactional requests were 1940 // idempotent. We could just re-issue requests. See #26915. 1941 bool async_consensus = 13; 1942 // can_forward_read_timestamp indicates that the batch can be evaluated at a 1943 // higher timestamp than the transaction's read timestamp. The flag is only 1944 // applicable to transactional batches and is assumed to be true for all 1945 // non-transactional batches. It is set by the client if the transaction 1946 // has not performed any reads that must be refreshed prior to sending this 1947 // current batch. When set, it allows the server to handle pushes and write 1948 // too old conditions locally. 1949 // 1950 // NOTE: this flag is a generalization of EndTxn.CanCommitAtHigherTimestamp. 1951 // That flag should be deprecated in favor of this one. 1952 // TODO(nvanbenschoten): perform this migration. 1953 bool can_forward_read_timestamp = 16; 1954 reserved 7, 12, 14; 1955 } 1956 1957 1958 // A BatchRequest contains one or more requests to be executed in 1959 // parallel, or if applicable (based on write-only commands and 1960 // range-locality), as a single update. 1961 message BatchRequest { 1962 option (gogoproto.goproto_stringer) = false; 1963 1964 Header header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 1965 repeated RequestUnion requests = 2 [(gogoproto.nullable) = false]; 1966 } 1967 1968 // A BatchResponse contains one or more responses, one per request 1969 // corresponding to the requests in the matching BatchRequest. The 1970 // error in the response header is set to the first error from the 1971 // slice of responses, if applicable. 1972 message BatchResponse { 1973 option (gogoproto.goproto_stringer) = false; 1974 1975 message Header { 1976 reserved 4; 1977 // error communicates a structured error (i.e. one originating from a Node) 1978 // while the BatchResponse is sent over the network. If the code were 1979 // written today, the RPC endpoint would return a message containing both a 1980 // BatchResponse and an Error, and this embedding would go away. However, it 1981 // returns only a BatchResponse, and so the Error needs to be tucked away 1982 // somewhere (the structured error cannot be communicated via an RPC-level 1983 // error). 1984 // 1985 // Outside of the RPC boundaries, this field is nil and must neither be 1986 // checked nor populated (it is reset by the DistSender, which extracts this 1987 // error and returns it separately). In effect, nearly no usage of 1988 // BatchResponse needs to care about this field. 1989 Error error = 1; 1990 // timestamp denotes the timestamp at which the batch's reads executed. The 1991 // timestamp cache is updated at this timestamp. 1992 util.hlc.Timestamp Timestamp = 2 [(gogoproto.nullable) = false]; 1993 // txn is non-nil if the request specified a non-nil 1994 // transaction. The transaction timestamp and/or priority may have 1995 // been updated, depending on the outcome of the request. 1996 Transaction txn = 3; 1997 // now is the highest current time from any node contacted during the request. 1998 // It can be used by the receiver to update its local HLC. 1999 util.hlc.Timestamp now = 5 [(gogoproto.nullable) = false]; 2000 // collected_spans stores trace spans recorded during the execution of this 2001 // request. 2002 repeated util.tracing.RecordedSpan collected_spans = 6 [(gogoproto.nullable) = false]; 2003 // NB: if you add a field here, don't forget to update combine(). 2004 } 2005 Header header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 2006 repeated ResponseUnion responses = 2 [(gogoproto.nullable) = false]; 2007 } 2008 2009 // RangeFeedRequest is a request that expresses the intention to establish a 2010 // RangeFeed stream over the provided span, starting at the specified timestamp. 2011 message RangeFeedRequest { 2012 Header header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; 2013 Span span = 2 [(gogoproto.nullable) = false]; 2014 // with_diff specifies whether RangeFeedValue updates should contain the 2015 // previous value that was overwritten. 2016 bool with_diff = 3; 2017 } 2018 2019 // RangeFeedValue is a variant of RangeFeedEvent that represents an update to 2020 // the specified key with the provided value. 2021 message RangeFeedValue { 2022 bytes key = 1 [(gogoproto.casttype) = "Key"]; 2023 Value value = 2 [(gogoproto.nullable) = false]; 2024 // prev_value is only populated if both: 2025 // 1. with_diff was passed in the corresponding RangeFeedRequest. 2026 // 2. the key-value was present and not a deletion tombstone before 2027 // this event. 2028 Value prev_value = 3 [(gogoproto.nullable) = false]; 2029 } 2030 2031 // RangeFeedCheckpoint is a variant of RangeFeedEvent that represents the 2032 // promise that no more RangeFeedValue events with keys in the specified span 2033 // with timestamps less than or equal to the specified resolved timestamp will 2034 // be emitted on the RangeFeed response stream. 2035 // 2036 // Note that these resolved timestamps may be lower than the timestamp used in 2037 // the RangeFeedRequest used to start the RangeFeed. 2038 message RangeFeedCheckpoint { 2039 Span span = 1 [(gogoproto.nullable) = false]; 2040 util.hlc.Timestamp resolved_ts = 2 [ 2041 (gogoproto.nullable) = false, (gogoproto.customname) = "ResolvedTS"]; 2042 } 2043 2044 // RangeFeedError is a variant of RangeFeedEvent that indicates that an error 2045 // occurred during the processing of the RangeFeed. If emitted, a RangeFeedError 2046 // event will always be the final event on a RangeFeed response stream before 2047 // it is torn down. 2048 message RangeFeedError { 2049 Error error = 1 [(gogoproto.nullable) = false]; 2050 } 2051 2052 // RangeFeedEvent is a union of all event types that may be returned on a 2053 // RangeFeed response stream. 2054 message RangeFeedEvent { 2055 option (gogoproto.onlyone) = true; 2056 2057 RangeFeedValue val = 1; 2058 RangeFeedCheckpoint checkpoint = 2; 2059 RangeFeedError error = 3; 2060 } 2061 2062 // Batch and RangeFeed service implemeted by nodes for KV API requests. 2063 service Internal { 2064 rpc Batch (BatchRequest) returns (BatchResponse) {} 2065 rpc RangeFeed (RangeFeedRequest) returns (stream RangeFeedEvent) {} 2066 }