github.com/aakash4dev/cometbft@v0.38.2/spec/consensus/consensus-paper/consensus.tex (about) 1 2 \section{Tendermint consensus algorithm} \label{sec:tendermint} 3 4 \newcommand\Disseminate{\textbf{Disseminate}} 5 6 \newcommand\Proposal{\mathsf{PROPOSAL}} 7 \newcommand\ProposalPart{\mathsf{PROPOSAL\mbox{-}PART}} 8 \newcommand\PrePrepare{\mathsf{INIT}} \newcommand\Prevote{\mathsf{PREVOTE}} 9 \newcommand\Precommit{\mathsf{PRECOMMIT}} 10 \newcommand\Decision{\mathsf{DECISION}} 11 12 \newcommand\ViewChange{\mathsf{VC}} 13 \newcommand\ViewChangeAck{\mathsf{VC\mbox{-}ACK}} 14 \newcommand\NewPrePrepare{\mathsf{VC\mbox{-}INIT}} 15 \newcommand\coord{\mathsf{proposer}} 16 17 \newcommand\newHeight{newHeight} \newcommand\newRound{newRound} 18 \newcommand\nil{nil} \newcommand\id{id} \newcommand{\propose}{propose} 19 \newcommand\prevote{prevote} \newcommand\prevoteWait{prevoteWait} 20 \newcommand\precommit{precommit} \newcommand\precommitWait{precommitWait} 21 \newcommand\commit{commit} 22 23 \newcommand\timeoutPropose{timeoutPropose} 24 \newcommand\timeoutPrevote{timeoutPrevote} 25 \newcommand\timeoutPrecommit{timeoutPrecommit} 26 \newcommand\proofOfLocking{proof\mbox{-}of\mbox{-}locking} 27 28 \begin{algorithm}[htb!] \def\baselinestretch{1} \scriptsize\raggedright 29 \begin{algorithmic}[1] 30 \SHORTSPACE 31 \INIT{} 32 \STATE $h_p := 0$ 33 \COMMENT{current height, or consensus instance we are currently executing} 34 \STATE $round_p := 0$ \COMMENT{current round number} 35 \STATE $step_p \in \set{\propose, \prevote, \precommit}$ 36 \STATE $decision_p[] := nil$ 37 \STATE $lockedValue_p := nil$ 38 \STATE $lockedRound_p := -1$ 39 \STATE $validValue_p := nil$ 40 \STATE $validRound_p := -1$ 41 \ENDINIT 42 \SHORTSPACE 43 \STATE \textbf{upon} start \textbf{do} $StartRound(0)$ 44 \SHORTSPACE 45 \FUNCTION{$StartRound(round)$} \label{line:tab:startRound} 46 \STATE $round_p \assign round$ 47 \STATE $step_p \assign \propose$ 48 \IF{$\coord(h_p, round_p) = p$} 49 \IF{$validValue_p \neq \nil$} \label{line:tab:isThereLockedValue} 50 \STATE $proposal \assign validValue_p$ \ELSE \STATE $proposal \assign 51 getValue()$ 52 \label{line:tab:getValidValue} 53 \ENDIF 54 \STATE \Broadcast\ $\li{\Proposal,h_p, round_p, proposal, validRound_p}$ 55 \label{line:tab:send-proposal} 56 \ELSE 57 \STATE \textbf{schedule} $OnTimeoutPropose(h_p, 58 round_p)$ to be executed \textbf{after} $\timeoutPropose(round_p)$ 59 \ENDIF 60 \ENDFUNCTION 61 62 \SPACE 63 \UPON{$\li{\Proposal,h_p,round_p, v, -1}$ \From\ $\coord(h_p,round_p)$ 64 \With\ $step_p = \propose$} \label{line:tab:recvProposal} 65 \IF{$valid(v) \wedge (lockedRound_p = -1 \vee lockedValue_p = v$)} 66 \label{line:tab:accept-proposal-2} 67 \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$ 68 \label{line:tab:prevote-proposal} 69 \ELSE 70 \label{line:tab:acceptProposal1} 71 \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$ 72 \label{line:tab:prevote-nil} 73 \ENDIF 74 \STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote1} 75 \ENDUPON 76 77 \SPACE 78 \UPON{$\li{\Proposal,h_p,round_p, v, vr}$ \From\ $\coord(h_p,round_p)$ 79 \textbf{AND} $2f+1$ $\li{\Prevote,h_p, vr,id(v)}$ \With\ $step_p = \propose \wedge (vr \ge 0 \wedge vr < round_p)$} 80 \label{line:tab:acceptProposal} 81 \IF{$valid(v) \wedge (lockedRound_p \le vr 82 \vee lockedValue_p = v)$} \label{line:tab:cond-prevote-higher-proposal} 83 \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$ 84 \label{line:tab:prevote-higher-proposal} 85 \ELSE 86 \label{line:tab:acceptProposal2} 87 \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$ 88 \label{line:tab:prevote-nil2} 89 \ENDIF 90 \STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote3} 91 \ENDUPON 92 93 \SPACE 94 \UPON{$2f+1$ $\li{\Prevote,h_p, round_p,*}$ \With\ $step_p = \prevote$ for the first time} 95 \label{line:tab:recvAny2/3Prevote} 96 \STATE \textbf{schedule} $OnTimeoutPrevote(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrevote(round_p)$ \label{line:tab:timeoutPrevote} 97 \ENDUPON 98 99 \SPACE 100 \UPON{$\li{\Proposal,h_p,round_p, v, *}$ \From\ $\coord(h_p,round_p)$ 101 \textbf{AND} $2f+1$ $\li{\Prevote,h_p, round_p,id(v)}$ \With\ $valid(v) \wedge step_p \ge \prevote$ for the first time} 102 \label{line:tab:recvPrevote} 103 \IF{$step_p = \prevote$} 104 \STATE $lockedValue_p \assign v$ \label{line:tab:setLockedValue} 105 \STATE $lockedRound_p \assign round_p$ \label{line:tab:setLockedRound} 106 \STATE \Broadcast \ $\li{\Precommit,h_p,round_p,id(v))}$ 107 \label{line:tab:precommit-v} 108 \STATE $step_p \assign \precommit$ \label{line:tab:setStateToCommit} 109 \ENDIF 110 \STATE $validValue_p \assign v$ \label{line:tab:setValidRound} 111 \STATE $validRound_p \assign round_p$ \label{line:tab:setValidValue} 112 \ENDUPON 113 114 \SHORTSPACE 115 \UPON{$2f+1$ $\li{\Prevote,h_p,round_p, \nil}$ 116 \With\ $step_p = \prevote$} 117 \STATE \Broadcast \ $\li{\Precommit,h_p,round_p, \nil}$ 118 \label{line:tab:precommit-v-1} 119 \STATE $step_p \assign \precommit$ 120 \ENDUPON 121 122 \SPACE 123 \UPON{$2f+1$ $\li{\Precommit,h_p,round_p,*}$ for the first time} 124 \label{line:tab:startTimeoutPrecommit} 125 \STATE \textbf{schedule} $OnTimeoutPrecommit(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrecommit(round_p)$ 126 127 \ENDUPON 128 129 \SPACE 130 \UPON{$\li{\Proposal,h_p,r, v, *}$ \From\ $\coord(h_p,r)$ \textbf{AND} 131 $2f+1$ $\li{\Precommit,h_p,r,id(v)}$ \With\ $decision_p[h_p] = \nil$} 132 \label{line:tab:onDecideRule} 133 \IF{$valid(v)$} \label{line:tab:validDecisionValue} 134 \STATE $decision_p[h_p] = v$ \label{line:tab:decide} 135 \STATE$h_p \assign h_p + 1$ \label{line:tab:increaseHeight} 136 \STATE reset $lockedRound_p$, $lockedValue_p$, $validRound_p$ and $validValue_p$ to initial values 137 and empty message log 138 \STATE $StartRound(0)$ 139 \ENDIF 140 \ENDUPON 141 142 \SHORTSPACE 143 \UPON{$f+1$ $\li{*,h_p,round, *, *}$ \textbf{with} $round > round_p$} 144 \label{line:tab:skipRounds} 145 \STATE $StartRound(round)$ \label{line:tab:nextRound2} 146 \ENDUPON 147 148 \SHORTSPACE 149 \FUNCTION{$OnTimeoutPropose(height,round)$} \label{line:tab:onTimeoutPropose} 150 \IF{$height = h_p \wedge round = round_p \wedge step_p = \propose$} 151 \STATE \Broadcast \ $\li{\Prevote,h_p,round_p, \nil}$ 152 \label{line:tab:prevote-nil-on-timeout} 153 \STATE $step_p \assign \prevote$ 154 \ENDIF 155 \ENDFUNCTION 156 157 \SHORTSPACE 158 \FUNCTION{$OnTimeoutPrevote(height,round)$} \label{line:tab:onTimeoutPrevote} 159 \IF{$height = h_p \wedge round = round_p \wedge step_p = \prevote$} 160 \STATE \Broadcast \ $\li{\Precommit,h_p,round_p,\nil}$ 161 \label{line:tab:precommit-nil-onTimeout} 162 \STATE $step_p \assign \precommit$ 163 \ENDIF 164 \ENDFUNCTION 165 166 \SHORTSPACE 167 \FUNCTION{$OnTimeoutPrecommit(height,round)$} \label{line:tab:onTimeoutPrecommit} 168 \IF{$height = h_p \wedge round = round_p$} 169 \STATE $StartRound(round_p + 1)$ \label{line:tab:nextRound} 170 \ENDIF 171 \ENDFUNCTION 172 \end{algorithmic} \caption{Tendermint consensus algorithm} 173 \label{alg:tendermint} 174 \end{algorithm} 175 176 In this section we present the Tendermint Byzantine fault-tolerant consensus 177 algorithm. The algorithm is specified by the pseudo-code shown in 178 Algorithm~\ref{alg:tendermint}. We present the algorithm as a set of \emph{upon 179 rules} that are executed atomically\footnote{In case several rules are active 180 at the same time, the first rule to be executed is picked randomly. The 181 correctness of the algorithm does not depend on the order in which rules are 182 executed.}. We assume that processes exchange protocol messages using a gossip 183 protocol and that both sent and received messages are stored in a local message 184 log for every process. An upon rule is triggered once the message log contains 185 messages such that the corresponding condition evaluates to $\tt{true}$. The 186 condition that assumes reception of $X$ messages of a particular type and 187 content denotes reception of messages whose senders have aggregate voting power at 188 least equal to $X$. For example, the condition $2f+1$ $\li{\Precommit,h_p,r,id(v)}$, 189 evaluates to true upon reception of $\Precommit$ messages for height $h_p$, 190 a round $r$ and with value equal to $id(v)$ whose senders have aggregate voting 191 power at least equal to $2f+1$. Some of the rules ends with "for the first time" constraint 192 to denote that it is triggered only the first time a corresponding condition evaluates 193 to $\tt{true}$. This is because those rules do not always change the state of algorithm 194 variables so without this constraint, the algorithm could keep 195 executing those rules forever. The variables with index $p$ are process local state 196 variables, while variables without index $p$ are value placeholders. The sign 197 $*$ denotes any value. 198 199 We denote with $n$ the total voting power of processes in the system, and we 200 assume that the total voting power of faulty processes in the system is bounded 201 with a system parameter $f$. The algorithm assumes that $n > 3f$, i.e., it 202 requires that the total voting power of faulty processes is smaller than one 203 third of the total voting power. For simplicity we present the algorithm for 204 the case $n = 3f + 1$. 205 206 The algorithm proceeds in rounds, where each round has a dedicated 207 \emph{proposer}. The mapping of rounds to proposers is known to all processes 208 and is given as a function $\coord(h, round)$, returning the proposer for 209 the round $round$ in the consensus instance $h$. We 210 assume that the proposer selection function is weighted round-robin, where 211 processes are rotated proportional to their voting power\footnote{A validator 212 with more voting power is selected more frequently, proportional to its power. 213 More precisely, during a sequence of rounds of size $n$, every process is 214 proposer in a number of rounds equal to its voting power.}. 215 The internal protocol state transitions are triggered by message reception and 216 by expiration of timeouts. There are three timeouts in Algorithm \ref{alg:tendermint}: 217 $\timeoutPropose$, $\timeoutPrevote$ and $\timeoutPrecommit$. 218 The timeouts prevent the algorithm from blocking and 219 waiting forever for some condition to be true, ensure that processes continuously 220 transition between rounds, and guarantee that eventually (after GST) communication 221 between correct processes is timely and reliable so they can decide. 222 The last role is achieved by increasing the timeouts with every new round $r$, 223 i.e, $timeoutX(r) = initTimeoutX + r*timeoutDelta$; 224 they are reset for every new height (consensus 225 instance). 226 227 Processes exchange the following messages in Tendermint: $\Proposal$, 228 $\Prevote$ and $\Precommit$. The $\Proposal$ message is used by the proposer of 229 the current round to suggest a potential decision value, while $\Prevote$ and 230 $\Precommit$ are votes for a proposed value. According to the classification of 231 consensus algorithms from \cite{RMS10:dsn}, Tendermint, like PBFT 232 \cite{CL02:tcs} and DLS \cite{DLS88:jacm}, belongs to class 3, so it requires 233 two voting steps (three communication exchanges in total) to decide a value. 234 The Tendermint consensus algorithm is designed for the blockchain context where 235 the value to decide is a block of transactions (ie. it is potentially quite 236 large, consisting of many transactions). Therefore, in the Algorithm 237 \ref{alg:tendermint} (similar as in \cite{CL02:tcs}) we are explicit about 238 sending a value (block of transactions) and a small, constant size value id (a 239 unique value identifier, normally a hash of the value, i.e., if $\id(v) = 240 \id(v')$, then $v=v'$). The $\Proposal$ message is the only one carrying the 241 value; $\Prevote$ and $\Precommit$ messages carry the value id. A correct 242 process decides on a value $v$ in Tendermint upon receiving the $\Proposal$ for 243 $v$ and $2f+1$ voting-power equivalent $\Precommit$ messages for $\id(v)$ in 244 some round $r$. In order to send $\Precommit$ message for $v$ in a round $r$, a 245 correct process waits to receive the $\Proposal$ and $2f+1$ of the 246 corresponding $\Prevote$ messages in the round $r$. Otherwise, 247 it sends $\Precommit$ message with a special $\nil$ value. 248 This ensures that correct processes can $\Precommit$ only a 249 single value (or $\nil$) in a round. As 250 proposers may be faulty, the proposed value is treated by correct processes as 251 a suggestion (it is not blindly accepted), and a correct process tells others 252 if it accepted the $\Proposal$ for value $v$ by sending $\Prevote$ message for 253 $\id(v)$; otherwise it sends $\Prevote$ message with the special $\nil$ value. 254 255 Every process maintains the following variables in the Algorithm 256 \ref{alg:tendermint}: $step$, $lockedValue$, $lockedRound$, $validValue$ and 257 $validRound$. The $step$ denotes the current state of the internal Tendermint 258 state machine, i.e., it reflects the stage of the algorithm execution in the 259 current round. The $lockedValue$ stores the most recent value (with respect to 260 a round number) for which a $\Precommit$ message has been sent. The 261 $lockedRound$ is the last round in which the process sent a $\Precommit$ 262 message that is not $\nil$. We also say that a correct process locks a value 263 $v$ in a round $r$ by setting $lockedValue = v$ and $lockedRound = r$ before 264 sending $\Precommit$ message for $\id(v)$. As a correct process can decide a 265 value $v$ only if $2f+1$ $\Precommit$ messages for $\id(v)$ are received, this 266 implies that a possible decision value is a value that is locked by at least 267 $f+1$ voting power equivalent of correct processes. Therefore, any value $v$ 268 for which $\Proposal$ and $2f+1$ of the corresponding $\Prevote$ messages are 269 received in some round $r$ is a \emph{possible decision} value. The role of the 270 $validValue$ variable is to store the most recent possible decision value; the 271 $validRound$ is the last round in which $validValue$ is updated. Apart from 272 those variables, a process also stores the current consensus instance ($h_p$, 273 called \emph{height} in Tendermint), and the current round number ($round_p$) 274 and attaches them to every message. Finally, a process also stores an array of 275 decisions, $decision_p$ (Tendermint assumes a sequence of consensus instances, 276 one for each height). 277 278 Every round starts by a proposer suggesting a value with the $\Proposal$ 279 message (see line \ref{line:tab:send-proposal}). In the initial round of each 280 height, the proposer is free to chose the value to suggest. In the 281 Algorithm~\ref{alg:tendermint}, a correct process obtains a value to propose 282 using an external function $getValue()$ that returns a valid value to 283 propose. In the following rounds, a correct proposer will suggest a new value 284 only if $validValue = \nil$; otherwise $validValue$ is proposed (see 285 lines~\ref{line:tab:isThereLockedValue}-\ref{line:tab:getValidValue}). 286 In addition to the value proposed, the $\Proposal$ message also 287 contains the $validRound$ so other processes are informed about the last round 288 in which the proposer observed $validValue$ as a possible decision value. 289 Note that if a correct proposer $p$ sends $validValue$ with the $validRound$ in the 290 $\Proposal$, this implies that the process $p$ received $\Proposal$ and the 291 corresponding $2f+1$ $\Prevote$ messages for $validValue$ in the round 292 $validRound$. 293 If a correct process sends $\Proposal$ message with $validValue$ ($validRound > -1$) 294 at time $t > GST$, by the \emph{Gossip communication} property, the 295 corresponding $\Proposal$ and the $\Prevote$ messages will be received by all 296 correct processes before time $t+\Delta$. Therefore, all correct processes will 297 be able to verify the correctness of the suggested value as it is supported by 298 the $\Proposal$ and the corresponding $2f+1$ voting power equivalent $\Prevote$ 299 messages. 300 301 A correct process $p$ accepts the proposal for a value $v$ (send $\Prevote$ 302 for $id(v)$) if an external \emph{valid} function returns $true$ for the value 303 $v$, and if $p$ hasn't locked any value ($lockedRound = -1$) or $p$ has locked 304 the value $v$ ($lockedValue = v$); see the line 305 \ref{line:tab:accept-proposal-2}. In case the proposed pair is $(v,vr \ge 0)$ and a 306 correct process $p$ has locked some value, it will accept 307 $v$ if it is a more recent possible decision value\footnote{As 308 explained above, the possible decision value in a round $r$ is the one for 309 which $\Proposal$ and the corresponding $2f+1$ $\Prevote$ messages are received 310 for the round $r$.}, $vr > lockedRound_p$, or if $lockedValue = v$ 311 (see line~\ref{line:tab:cond-prevote-higher-proposal}). Otherwise, a correct 312 process will reject the proposal by sending $\Prevote$ message with $\nil$ 313 value. A correct process will send $\Prevote$ message with $\nil$ value also in 314 case $\timeoutPropose$ expired (it is triggered when a correct process starts a 315 new round) and a process has not sent $\Prevote$ message in the current round 316 yet (see the line \ref{line:tab:onTimeoutPropose}). 317 318 If a correct process receives $\Proposal$ message for some value $v$ and $2f+1$ 319 $\Prevote$ messages for $\id(v)$, then it sends $\Precommit$ message with 320 $\id(v)$. Otherwise, it sends $\Precommit$ $\nil$. A correct process will send 321 $\Precommit$ message with $\nil$ value also in case $\timeoutPrevote$ expired 322 (it is started when a correct process sent $\Prevote$ message and received any 323 $2f+1$ $\Prevote$ messages) and a process has not sent $\Precommit$ message in 324 the current round yet (see the line \ref{line:tab:onTimeoutPrecommit}). A 325 correct process decides on some value $v$ if it receives in some round $r$ 326 $\Proposal$ message for $v$ and $2f+1$ $\Precommit$ messages with $\id(v)$ (see 327 the line \ref{line:tab:decide}). To prevent the algorithm from blocking and 328 waiting forever for this condition to be true, the Algorithm 329 \ref{alg:tendermint} relies on $\timeoutPrecommit$. It is triggered after a 330 process receives any set of $2f+1$ $\Precommit$ messages for the current round. 331 If the $\timeoutPrecommit$ expires and a process has not decided yet, the 332 process starts the next round (see the line \ref{line:tab:onTimeoutPrecommit}). 333 When a correct process $p$ decides, it starts the next consensus instance 334 (for the next height). The \emph{Gossip communication} property ensures 335 that $\Proposal$ and $2f+1$ $\Prevote$ messages that led $p$ to decide 336 are eventually received by all correct processes, so they will also decide. 337 338 \subsection{Termination mechanism} 339 340 Tendermint ensures termination by a novel mechanism that benefits from the 341 gossip based nature of communication (see \emph{Gossip communication} 342 property). It requires managing two additional variables, $validValue$ and 343 $validRound$ that are then used by the proposer during the propose step as 344 explained above. The $validValue$ and $validRound$ are updated to $v$ and $r$ 345 by a correct process in a round $r$ when the process receives valid $\Proposal$ 346 message for the value $v$ and the corresponding $2f+1$ $\Prevote$ messages for 347 $id(v)$ in the round $r$ (see the rule at line~\ref{line:tab:recvPrevote}). 348 349 We now give briefly the intuition how managing and proposing $validValue$ 350 and $validRound$ ensures termination. Formal treatment is left for 351 Section~\ref{sec:proof}. 352 353 The first thing to note is that during good period, because of the 354 \emph{Gossip communication} property, if a correct process $p$ locks a value 355 $v$ in some round $r$, all correct processes will update $validValue$ to $v$ 356 and $validRound$ to $r$ before the end of the round $r$ (we prove this formally 357 in the Section~\ref{sec:proof}). The intuition is that messages that led to $p$ 358 locking a value $v$ in the round $r$ will be gossiped to all correct processes 359 before the end of the round $r$, so it will update $validValue$ and 360 $validRound$ (the line~\ref{line:tab:recvPrevote}). Therefore, if a correct 361 process locks some value during good period, $validValue$ and $validRound$ are 362 updated by all correct processes so that the value proposed in the following 363 rounds will be acceptable by all correct processes. Note 364 that it could happen that during good period, no correct process locks a value, 365 but some correct process $q$ updates $validValue$ and $validRound$ during some 366 round. As no correct process locks a value in this case, $validValue_q$ and 367 $validRound_q$ will also be acceptable by all correct processes as 368 $validRound_q > lockedRound_c$ for every correct process $c$ and as the 369 \emph{Gossip communication} property ensures that the corresponding $\Prevote$ 370 messages that $q$ received in the round $validRound_q$ are received by all 371 correct processes $\Delta$ time later. 372 373 Finally, it could happen that after GST, there is a long sequence of rounds in which 374 no correct process neither locks a value nor update $validValue$ and $validRound$. 375 In this case, during this sequence of rounds, the proposed value suggested by correct 376 processes was not accepted by all correct processes. Note that this sequence of rounds 377 is always finite as at the beginning of every 378 round there is at least a single correct process $c$ such that $validValue_c$ 379 and $validRound_c$ are acceptable by every correct process. This is true as 380 there exists a correct process $c$ such that for every other correct process 381 $p$, $validRound_c > lockedRound_p$ or $validValue_c = lockedValue_p$. This is 382 true as $c$ is the process that has locked a value in the most recent round 383 among all correct processes (or no correct process locked any value). Therefore, 384 eventually $c$ will be the proper in some round and the proposed value will be accepted 385 by all correct processes, terminating therefore this sequence of 386 rounds. 387 388 Therefore, updating $validValue$ and $validRound$ variables, and the 389 \emph{Gossip communication} property, together ensures that eventually, during 390 the good period, there exists a round with a correct proposer whose proposed 391 value will be accepted by all correct processes, and all correct processes will 392 terminate in that round. Note that this mechanism, contrary to the common 393 termination mechanism illustrated in the 394 Figure~\ref{ch3:fig:coordinator-change}, does not require exchanging any 395 additional information in addition to messages already sent as part of what is 396 normally being called "normal" case. 397