github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft.proto (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 syntax = "proto2"; 12 package cockroach.kv.kvserver; 13 option go_package = "kvserver"; 14 15 import "roachpb/errors.proto"; 16 import "roachpb/metadata.proto"; 17 import "kv/kvserver/kvserverpb/state.proto"; 18 import "etcd/raft/raftpb/raft.proto"; 19 import "gogoproto/gogo.proto"; 20 21 // RaftHeartbeat is a request that contains the barebones information for a 22 // raftpb.MsgHeartbeat raftpb.Message. RaftHeartbeats are coalesced and sent 23 // in a RaftMessageRequest, and reconstructed by the receiver into individual 24 // raftpb.Message protos. 25 message RaftHeartbeat { 26 optional uint64 range_id = 1 [(gogoproto.nullable) = false, 27 (gogoproto.customname) = "RangeID", 28 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID"]; 29 optional uint32 from_replica_id = 2 [(gogoproto.nullable) = false, 30 (gogoproto.customname) = "FromReplicaID", 31 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.ReplicaID"]; 32 optional uint32 to_replica_id = 3 [(gogoproto.nullable) = false, 33 (gogoproto.customname) = "ToReplicaID", 34 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.ReplicaID"]; 35 optional uint64 term = 4 [(gogoproto.nullable) = false]; 36 optional uint64 commit = 5 [(gogoproto.nullable) = false]; 37 optional bool quiesce = 6 [(gogoproto.nullable) = false]; 38 39 // ToIsLearner was added in v19.2 to aid in the transition from preemptive 40 // snapshots to learner replicas. If a Replica learns its ID from a message 41 // which indicates that it is a learner and it is not currently a part of the 42 // range (due to being from a preemptive snapshot) then it must delete all of 43 // its data. 44 // 45 // TODO(ajwerner): remove in 20.2 once we ensure that preemptive snapshots can 46 // no longer be present and that we're never talking to a 19.2 node. 47 optional bool to_is_learner = 7 [(gogoproto.nullable) = false]; 48 } 49 50 // RaftMessageRequest is the request used to send raft messages using our 51 // protobuf-based RPC codec. If a RaftMessageRequest has a non-empty number of 52 // heartbeats or heartbeat_resps, the contents of the message field is treated 53 // as a dummy message and discarded. A coalesced heartbeat request's replica 54 // descriptor's range ID must be zero. 55 message RaftMessageRequest { 56 optional uint64 range_id = 1 [(gogoproto.nullable) = false, 57 (gogoproto.customname) = "RangeID", 58 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID"]; 59 // Optionally, the start key of the sending replica. This is only populated 60 // as a "hint" under certain conditions. 61 optional bytes range_start_key = 8 [(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RKey"]; 62 63 optional roachpb.ReplicaDescriptor from_replica = 2 [(gogoproto.nullable) = false]; 64 optional roachpb.ReplicaDescriptor to_replica = 3 [(gogoproto.nullable) = false]; 65 66 optional raftpb.Message message = 4 [(gogoproto.nullable) = false]; 67 68 // Is this a quiesce request? A quiesce request is a MsgHeartbeat 69 // which is requesting the recipient to stop ticking its local 70 // replica as long as the current Raft state matches the heartbeat 71 // Term/Commit. If the Term/Commit match, the recipient is marked as 72 // quiescent. If they don't match, the message is passed along to 73 // Raft which will generate a MsgHeartbeatResp that will unquiesce 74 // the sender. 75 optional bool quiesce = 5 [(gogoproto.nullable) = false]; 76 77 // A coalesced heartbeat request is any RaftMessageRequest with a nonzero number of 78 // heartbeats or heartbeat_resps. 79 repeated RaftHeartbeat heartbeats = 6 [(gogoproto.nullable) = false]; 80 repeated RaftHeartbeat heartbeat_resps = 7 [(gogoproto.nullable) = false]; 81 } 82 83 message RaftMessageRequestBatch { 84 repeated RaftMessageRequest requests = 1 [(gogoproto.nullable) = false]; 85 } 86 87 message RaftMessageResponseUnion { 88 option (gogoproto.onlyone) = true; 89 90 optional roachpb.Error error = 1; 91 } 92 93 // RaftMessageResponse may be sent to the sender of a 94 // RaftMessageRequest. RaftMessage does not use the usual 95 // request/response pattern; it is primarily modeled as a one-way 96 // stream of requests. Normal 'responses' are usually sent as new 97 // requests on a separate stream in the other direction. 98 // RaftMessageResponse is not sent for every RaftMessageRequest, but 99 // may be used for certain error conditions. 100 message RaftMessageResponse { 101 optional uint64 range_id = 1 [(gogoproto.nullable) = false, 102 (gogoproto.customname) = "RangeID", 103 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID"]; 104 105 optional roachpb.ReplicaDescriptor from_replica = 2 [(gogoproto.nullable) = false]; 106 optional roachpb.ReplicaDescriptor to_replica = 3 [(gogoproto.nullable) = false]; 107 108 optional RaftMessageResponseUnion union = 4 [(gogoproto.nullable) = false]; 109 } 110 111 // SnapshotRequest is the request used to send streaming snapshot requests. 112 message SnapshotRequest { 113 enum Priority { 114 UNKNOWN = 0; 115 // RECOVERY is used for a Raft-initiated snapshots and for 116 // up-replication snapshots (i.e. when a dead node has been 117 // removed and the range needs to be up-replicated). 118 RECOVERY = 1; 119 // REBALANCE is used for snapshots involved in rebalancing. 120 REBALANCE = 2; 121 } 122 123 enum Strategy { 124 // KV_BATCH snapshots stream batches of KV pairs for all keys in a 125 // range from the sender the the receiver. These KV pairs are then 126 // combined into a large RocksDB WriteBatch that is atomically 127 // applied. 128 KV_BATCH = 0; 129 } 130 131 enum Type { 132 RAFT = 0; 133 LEARNER = 1; 134 reserved 2; 135 } 136 137 message Header { 138 reserved 1; 139 140 // The replica state at the time the snapshot was generated. Note 141 // that ReplicaState.Desc differs from the above range_descriptor 142 // field which holds the updated descriptor after the new replica 143 // has been added while ReplicaState.Desc holds the descriptor 144 // before the new replica has been added. 145 optional storagepb.ReplicaState state = 5 [(gogoproto.nullable) = false]; 146 147 // The inner raft message is of type MsgSnap, and its snapshot data contains a UUID. 148 optional RaftMessageRequest raft_message_request = 2 [(gogoproto.nullable) = false]; 149 150 // The estimated size of the range, to be used in reservation decisions. 151 optional int64 range_size = 3 [(gogoproto.nullable) = false]; 152 153 // can_decline is set on preemptive snapshots, but not those generated 154 // by raft because at that point it is better to queue up the stream 155 // than to cancel it. 156 optional bool can_decline = 4 [(gogoproto.nullable) = false]; 157 158 // The priority of the snapshot. 159 optional Priority priority = 6 [(gogoproto.nullable) = false]; 160 161 // The strategy of the snapshot. 162 optional Strategy strategy = 7 [(gogoproto.nullable) = false]; 163 164 // The type of the snapshot. 165 optional Type type = 9 [(gogoproto.nullable) = false]; 166 167 // Whether the snapshot uses the unreplicated RaftTruncatedState or not. 168 // This is generally always true at 2.2 and above outside of the migration 169 // phase, though theoretically it could take a long time for all ranges 170 // to update to the new mechanism. This bool is true iff the Raft log at 171 // the snapshot's applied index is using the new key. In particular, it 172 // is true if the index itself carries out the migration (in which case 173 // the data in the snapshot contains neither key). 174 // 175 // See VersionUnreplicatedRaftTruncatedState. 176 optional bool unreplicated_truncated_state = 8 [(gogoproto.nullable) = false]; 177 } 178 179 optional Header header = 1; 180 181 // A RocksDB BatchRepr. Multiple kv_batches may be sent across multiple request messages. 182 optional bytes kv_batch = 2 [(gogoproto.customname) = "KVBatch"]; 183 184 // These are really raftpb.Entry, but we model them as raw bytes to avoid 185 // roundtripping through memory. They are separate from the kv_batch to 186 // allow flexibility in log implementations. 187 repeated bytes log_entries = 3; 188 189 optional bool final = 4 [(gogoproto.nullable) = false]; 190 } 191 192 message SnapshotResponse { 193 enum Status { 194 UNKNOWN = 0; 195 ACCEPTED = 1; 196 APPLIED = 2; 197 ERROR = 3; 198 DECLINED = 4; 199 } 200 optional Status status = 1 [(gogoproto.nullable) = false]; 201 optional string message = 2 [(gogoproto.nullable) = false]; 202 reserved 3; 203 } 204 205 // ConfChangeContext is encoded in the raftpb.ConfChange.Context field. 206 message ConfChangeContext { 207 optional string command_id = 1 [(gogoproto.nullable) = false, 208 (gogoproto.customname) = "CommandID"]; 209 210 // Payload is the application-level command (i.e. an encoded 211 // kvserverpb.RaftCommand). 212 optional bytes payload = 2; 213 } 214