github.com/pokt-network/tendermint@v0.32.11-0.20230426215212-59310158d3e9/consensus/wal.go (about) 1 package consensus 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 "hash/crc32" 7 "io" 8 "path/filepath" 9 "time" 10 11 "github.com/pkg/errors" 12 13 amino "github.com/tendermint/go-amino" 14 15 auto "github.com/tendermint/tendermint/libs/autofile" 16 "github.com/tendermint/tendermint/libs/log" 17 tmos "github.com/tendermint/tendermint/libs/os" 18 "github.com/tendermint/tendermint/libs/service" 19 "github.com/tendermint/tendermint/types" 20 tmtime "github.com/tendermint/tendermint/types/time" 21 ) 22 23 const ( 24 // amino overhead + time.Time + max consensus msg size 25 // 26 // q: where 24 bytes are coming from? 27 // a: cdc.MustMarshalBinaryBare(empty consensus part msg) = 14 bytes. +10 28 // bytes just in case amino will require more space in the future. 29 maxMsgSizeBytes = maxMsgSize + 24 30 31 // how often the WAL should be sync'd during period sync'ing 32 walDefaultFlushInterval = 2 * time.Second 33 ) 34 35 //-------------------------------------------------------- 36 // types and functions for savings consensus messages 37 38 // TimedWALMessage wraps WALMessage and adds Time for debugging purposes. 39 type TimedWALMessage struct { 40 Time time.Time `json:"time"` 41 Msg WALMessage `json:"msg"` 42 } 43 44 // EndHeightMessage marks the end of the given height inside WAL. 45 // @internal used by scripts/wal2json util. 46 type EndHeightMessage struct { 47 Height int64 `json:"height"` 48 } 49 50 type WALMessage interface{} 51 52 func RegisterWALMessages(cdc *amino.Codec) { 53 cdc.RegisterInterface((*WALMessage)(nil), nil) 54 cdc.RegisterConcrete(types.EventDataRoundState{}, "tendermint/wal/EventDataRoundState", nil) 55 cdc.RegisterConcrete(msgInfo{}, "tendermint/wal/MsgInfo", nil) 56 cdc.RegisterConcrete(timeoutInfo{}, "tendermint/wal/TimeoutInfo", nil) 57 cdc.RegisterConcrete(EndHeightMessage{}, "tendermint/wal/EndHeightMessage", nil) 58 } 59 60 //-------------------------------------------------------- 61 // Simple write-ahead logger 62 63 // WAL is an interface for any write-ahead logger. 64 type WAL interface { 65 Write(WALMessage) error 66 WriteSync(WALMessage) error 67 FlushAndSync() error 68 69 SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) 70 71 // service methods 72 Start() error 73 Stop() error 74 Wait() 75 } 76 77 // Write ahead logger writes msgs to disk before they are processed. 78 // Can be used for crash-recovery and deterministic replay. 79 // TODO: currently the wal is overwritten during replay catchup, give it a mode 80 // so it's either reading or appending - must read to end to start appending 81 // again. 82 type BaseWAL struct { 83 service.BaseService 84 85 group *auto.Group 86 87 enc *WALEncoder 88 89 flushTicker *time.Ticker 90 flushInterval time.Duration 91 } 92 93 var _ WAL = &BaseWAL{} 94 95 // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements 96 // WAL. It's flushed and synced to disk every 2s and once when stopped. 97 func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { 98 err := tmos.EnsureDir(filepath.Dir(walFile), 0700) 99 if err != nil { 100 return nil, errors.Wrap(err, "failed to ensure WAL directory is in place") 101 } 102 103 group, err := auto.OpenGroup(walFile, groupOptions...) 104 if err != nil { 105 return nil, err 106 } 107 wal := &BaseWAL{ 108 group: group, 109 enc: NewWALEncoder(group), 110 flushInterval: walDefaultFlushInterval, 111 } 112 wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal) 113 return wal, nil 114 } 115 116 // SetFlushInterval allows us to override the periodic flush interval for the WAL. 117 func (wal *BaseWAL) SetFlushInterval(i time.Duration) { 118 wal.flushInterval = i 119 } 120 121 func (wal *BaseWAL) Group() *auto.Group { 122 return wal.group 123 } 124 125 func (wal *BaseWAL) SetLogger(l log.Logger) { 126 wal.BaseService.Logger = l 127 wal.group.SetLogger(l) 128 } 129 130 func (wal *BaseWAL) OnStart() error { 131 size, err := wal.group.Head.Size() 132 if err != nil { 133 return err 134 } else if size == 0 { 135 wal.WriteSync(EndHeightMessage{0}) 136 } 137 err = wal.group.Start() 138 if err != nil { 139 return err 140 } 141 wal.flushTicker = time.NewTicker(wal.flushInterval) 142 go wal.processFlushTicks() 143 return nil 144 } 145 146 func (wal *BaseWAL) processFlushTicks() { 147 for { 148 select { 149 case <-wal.flushTicker.C: 150 if err := wal.FlushAndSync(); err != nil { 151 wal.Logger.Error("Periodic WAL flush failed", "err", err) 152 } 153 case <-wal.Quit(): 154 return 155 } 156 } 157 } 158 159 // FlushAndSync flushes and fsync's the underlying group's data to disk. 160 // See auto#FlushAndSync 161 func (wal *BaseWAL) FlushAndSync() error { 162 return wal.group.FlushAndSync() 163 } 164 165 // Stop the underlying autofile group. 166 // Use Wait() to ensure it's finished shutting down 167 // before cleaning up files. 168 func (wal *BaseWAL) OnStop() { 169 wal.flushTicker.Stop() 170 wal.FlushAndSync() 171 wal.group.Stop() 172 wal.group.Close() 173 } 174 175 // Wait for the underlying autofile group to finish shutting down 176 // so it's safe to cleanup files. 177 func (wal *BaseWAL) Wait() { 178 wal.group.Wait() 179 } 180 181 // Write is called in newStep and for each receive on the 182 // peerMsgQueue and the timeoutTicker. 183 // NOTE: does not call fsync() 184 func (wal *BaseWAL) Write(msg WALMessage) error { 185 if wal == nil { 186 return nil 187 } 188 189 if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil { 190 wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height", 191 "err", err, "msg", msg) 192 return err 193 } 194 195 return nil 196 } 197 198 // WriteSync is called when we receive a msg from ourselves 199 // so that we write to disk before sending signed messages. 200 // NOTE: calls fsync() 201 func (wal *BaseWAL) WriteSync(msg WALMessage) error { 202 if wal == nil { 203 return nil 204 } 205 206 if err := wal.Write(msg); err != nil { 207 return err 208 } 209 210 if err := wal.FlushAndSync(); err != nil { 211 wal.Logger.Error(`WriteSync failed to flush consensus wal. 212 WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, 213 "err", err) 214 return err 215 } 216 217 return nil 218 } 219 220 // WALSearchOptions are optional arguments to SearchForEndHeight. 221 type WALSearchOptions struct { 222 // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. 223 IgnoreDataCorruptionErrors bool 224 } 225 226 // SearchForEndHeight searches for the EndHeightMessage with the given height 227 // and returns an auto.GroupReader, whenever it was found or not and an error. 228 // Group reader will be nil if found equals false. 229 // 230 // CONTRACT: caller must close group reader. 231 func (wal *BaseWAL) SearchForEndHeight( 232 height int64, 233 options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 234 var ( 235 msg *TimedWALMessage 236 gr *auto.GroupReader 237 ) 238 lastHeightFound := int64(-1) 239 240 // NOTE: starting from the last file in the group because we're usually 241 // searching for the last height. See replay.go 242 min, max := wal.group.MinIndex(), wal.group.MaxIndex() 243 wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) 244 for index := max; index >= min; index-- { 245 gr, err = wal.group.NewReader(index) 246 if err != nil { 247 return nil, false, err 248 } 249 250 dec := NewWALDecoder(gr) 251 for { 252 msg, err = dec.Decode() 253 if err == io.EOF { 254 // OPTIMISATION: no need to look for height in older files if we've seen h < height 255 if lastHeightFound > 0 && lastHeightFound < height { 256 gr.Close() 257 return nil, false, nil 258 } 259 // check next file 260 break 261 } 262 if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { 263 wal.Logger.Error("Corrupted entry. Skipping...", "err", err) 264 // do nothing 265 continue 266 } else if err != nil { 267 gr.Close() 268 return nil, false, err 269 } 270 271 if m, ok := msg.Msg.(EndHeightMessage); ok { 272 lastHeightFound = m.Height 273 if m.Height == height { // found 274 wal.Logger.Info("Found", "height", height, "index", index) 275 return gr, true, nil 276 } 277 } 278 } 279 gr.Close() 280 } 281 282 return nil, false, nil 283 } 284 285 /////////////////////////////////////////////////////////////////////////////// 286 287 // A WALEncoder writes custom-encoded WAL messages to an output stream. 288 // 289 // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value (go-amino encoded) 290 type WALEncoder struct { 291 wr io.Writer 292 } 293 294 // NewWALEncoder returns a new encoder that writes to wr. 295 func NewWALEncoder(wr io.Writer) *WALEncoder { 296 return &WALEncoder{wr} 297 } 298 299 // Encode writes the custom encoding of v to the stream. It returns an error if 300 // the amino-encoded size of v is greater than 1MB. Any error encountered 301 // during the write is also returned. 302 func (enc *WALEncoder) Encode(v *TimedWALMessage) error { 303 data := cdc.MustMarshalBinaryBare(v) 304 305 crc := crc32.Checksum(data, crc32c) 306 length := uint32(len(data)) 307 if length > maxMsgSizeBytes { 308 return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) 309 } 310 totalLength := 8 + int(length) 311 312 msg := make([]byte, totalLength) 313 binary.BigEndian.PutUint32(msg[0:4], crc) 314 binary.BigEndian.PutUint32(msg[4:8], length) 315 copy(msg[8:], data) 316 317 _, err := enc.wr.Write(msg) 318 return err 319 } 320 321 /////////////////////////////////////////////////////////////////////////////// 322 323 // IsDataCorruptionError returns true if data has been corrupted inside WAL. 324 func IsDataCorruptionError(err error) bool { 325 _, ok := err.(DataCorruptionError) 326 return ok 327 } 328 329 // DataCorruptionError is an error that occures if data on disk was corrupted. 330 type DataCorruptionError struct { 331 cause error 332 } 333 334 func (e DataCorruptionError) Error() string { 335 return fmt.Sprintf("DataCorruptionError[%v]", e.cause) 336 } 337 338 func (e DataCorruptionError) Cause() error { 339 return e.cause 340 } 341 342 // A WALDecoder reads and decodes custom-encoded WAL messages from an input 343 // stream. See WALEncoder for the format used. 344 // 345 // It will also compare the checksums and make sure data size is equal to the 346 // length from the header. If that is not the case, error will be returned. 347 type WALDecoder struct { 348 rd io.Reader 349 } 350 351 // NewWALDecoder returns a new decoder that reads from rd. 352 func NewWALDecoder(rd io.Reader) *WALDecoder { 353 return &WALDecoder{rd} 354 } 355 356 // Decode reads the next custom-encoded value from its reader and returns it. 357 func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { 358 b := make([]byte, 4) 359 360 _, err := dec.rd.Read(b) 361 if err == io.EOF { 362 return nil, err 363 } 364 if err != nil { 365 return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} 366 } 367 crc := binary.BigEndian.Uint32(b) 368 369 b = make([]byte, 4) 370 _, err = dec.rd.Read(b) 371 if err != nil { 372 return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} 373 } 374 length := binary.BigEndian.Uint32(b) 375 376 if length > maxMsgSizeBytes { 377 return nil, DataCorruptionError{fmt.Errorf( 378 "length %d exceeded maximum possible value of %d bytes", 379 length, 380 maxMsgSizeBytes)} 381 } 382 383 data := make([]byte, length) 384 n, err := dec.rd.Read(data) 385 if err != nil { 386 return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} 387 } 388 389 // check checksum before decoding data 390 actualCRC := crc32.Checksum(data, crc32c) 391 if actualCRC != crc { 392 return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} 393 } 394 395 var res = new(TimedWALMessage) // nolint: gosimple 396 err = cdc.UnmarshalBinaryBare(data, res) 397 if err != nil { 398 return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} 399 } 400 401 return res, err 402 } 403 404 type nilWAL struct{} 405 406 var _ WAL = nilWAL{} 407 408 func (nilWAL) Write(m WALMessage) error { return nil } 409 func (nilWAL) WriteSync(m WALMessage) error { return nil } 410 func (nilWAL) FlushAndSync() error { return nil } 411 func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 412 return nil, false, nil 413 } 414 func (nilWAL) Start() error { return nil } 415 func (nilWAL) Stop() error { return nil } 416 func (nilWAL) Wait() {}