github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/internal/consensus/wal.go (about) 1 package consensus 2 3 import ( 4 "context" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 "hash/crc32" 9 "io" 10 "path/filepath" 11 "time" 12 13 "github.com/gogo/protobuf/proto" 14 15 "github.com/ari-anchor/sei-tendermint/internal/jsontypes" 16 auto "github.com/ari-anchor/sei-tendermint/internal/libs/autofile" 17 "github.com/ari-anchor/sei-tendermint/libs/log" 18 tmos "github.com/ari-anchor/sei-tendermint/libs/os" 19 "github.com/ari-anchor/sei-tendermint/libs/service" 20 tmtime "github.com/ari-anchor/sei-tendermint/libs/time" 21 tmcons "github.com/ari-anchor/sei-tendermint/proto/tendermint/consensus" 22 ) 23 24 const ( 25 // time.Time + max consensus msg size 26 maxMsgSizeBytes = maxMsgSize + 24 27 28 // how often the WAL should be sync'd during period sync'ing 29 walDefaultFlushInterval = 2 * time.Second 30 ) 31 32 //-------------------------------------------------------- 33 // types and functions for savings consensus messages 34 35 // TimedWALMessage wraps WALMessage and adds Time for debugging purposes. 36 type TimedWALMessage struct { 37 Time time.Time `json:"time"` 38 Msg WALMessage `json:"msg"` 39 } 40 41 // EndHeightMessage marks the end of the given height inside WAL. 42 // @internal used by scripts/wal2json util. 43 type EndHeightMessage struct { 44 Height int64 `json:"height,string"` 45 } 46 47 func (EndHeightMessage) TypeTag() string { return "tendermint/wal/EndHeightMessage" } 48 49 type WALMessage interface{} 50 51 func init() { 52 jsontypes.MustRegister(msgInfo{}) 53 jsontypes.MustRegister(timeoutInfo{}) 54 jsontypes.MustRegister(EndHeightMessage{}) 55 } 56 57 //-------------------------------------------------------- 58 // Simple write-ahead logger 59 60 // WAL is an interface for any write-ahead logger. 61 type WAL interface { 62 Write(WALMessage) error 63 WriteSync(WALMessage) error 64 FlushAndSync() error 65 66 SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) 67 68 // service methods 69 Start(context.Context) error 70 Stop() 71 Wait() 72 } 73 74 // Write ahead logger writes msgs to disk before they are processed. 75 // Can be used for crash-recovery and deterministic replay. 76 // TODO: currently the wal is overwritten during replay catchup, give it a mode 77 // so it's either reading or appending - must read to end to start appending 78 // again. 79 type BaseWAL struct { 80 service.BaseService 81 logger log.Logger 82 83 group *auto.Group 84 85 enc *WALEncoder 86 87 flushTicker *time.Ticker 88 flushInterval time.Duration 89 } 90 91 var _ WAL = &BaseWAL{} 92 93 // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements 94 // WAL. It's flushed and synced to disk every 2s and once when stopped. 95 func NewWAL(ctx context.Context, logger log.Logger, walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { 96 err := tmos.EnsureDir(filepath.Dir(walFile), 0700) 97 if err != nil { 98 return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err) 99 } 100 101 group, err := auto.OpenGroup(ctx, logger, walFile, groupOptions...) 102 if err != nil { 103 return nil, err 104 } 105 wal := &BaseWAL{ 106 logger: logger, 107 group: group, 108 enc: NewWALEncoder(group), 109 flushInterval: walDefaultFlushInterval, 110 } 111 wal.BaseService = *service.NewBaseService(logger, "baseWAL", wal) 112 return wal, nil 113 } 114 115 // SetFlushInterval allows us to override the periodic flush interval for the WAL. 116 func (wal *BaseWAL) SetFlushInterval(i time.Duration) { 117 wal.flushInterval = i 118 } 119 120 func (wal *BaseWAL) Group() *auto.Group { 121 return wal.group 122 } 123 124 func (wal *BaseWAL) OnStart(ctx context.Context) error { 125 size, err := wal.group.Head.Size() 126 if err != nil { 127 return err 128 } else if size == 0 { 129 if err := wal.WriteSync(EndHeightMessage{0}); err != nil { 130 return err 131 } 132 } 133 err = wal.group.Start(ctx) 134 if err != nil { 135 return err 136 } 137 wal.flushTicker = time.NewTicker(wal.flushInterval) 138 go wal.processFlushTicks(ctx) 139 return nil 140 } 141 142 func (wal *BaseWAL) processFlushTicks(ctx context.Context) { 143 for { 144 select { 145 case <-wal.flushTicker.C: 146 if err := wal.FlushAndSync(); err != nil { 147 wal.logger.Error("Periodic WAL flush failed", "err", err) 148 } 149 case <-ctx.Done(): 150 return 151 } 152 } 153 } 154 155 // FlushAndSync flushes and fsync's the underlying group's data to disk. 156 // See auto#FlushAndSync 157 func (wal *BaseWAL) FlushAndSync() error { 158 return wal.group.FlushAndSync() 159 } 160 161 // Stop the underlying autofile group. 162 // Use Wait() to ensure it's finished shutting down 163 // before cleaning up files. 164 func (wal *BaseWAL) OnStop() { 165 wal.flushTicker.Stop() 166 if err := wal.FlushAndSync(); err != nil { 167 wal.logger.Error("error on flush data to disk", "error", err) 168 } 169 wal.group.Stop() 170 wal.group.Close() 171 } 172 173 // Wait for the underlying autofile group to finish shutting down 174 // so it's safe to cleanup files. 175 func (wal *BaseWAL) Wait() { 176 if wal.IsRunning() { 177 wal.BaseService.Wait() 178 } 179 if wal.group.IsRunning() { 180 wal.group.Wait() 181 } 182 } 183 184 // Write is called in newStep and for each receive on the 185 // peerMsgQueue and the timeoutTicker. 186 // NOTE: does not call fsync() 187 func (wal *BaseWAL) Write(msg WALMessage) error { 188 if wal == nil { 189 return nil 190 } 191 192 if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil { 193 wal.logger.Error("error writing msg to consensus wal. WARNING: recover may not be possible for the current height", 194 "err", err, "msg", msg) 195 return err 196 } 197 198 return nil 199 } 200 201 // WriteSync is called when we receive a msg from ourselves 202 // so that we write to disk before sending signed messages. 203 // NOTE: calls fsync() 204 func (wal *BaseWAL) WriteSync(msg WALMessage) error { 205 if wal == nil { 206 return nil 207 } 208 209 if err := wal.Write(msg); err != nil { 210 return err 211 } 212 213 if err := wal.FlushAndSync(); err != nil { 214 wal.logger.Error(`WriteSync failed to flush consensus wal. 215 WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, 216 "err", err) 217 return err 218 } 219 220 return nil 221 } 222 223 // WALSearchOptions are optional arguments to SearchForEndHeight. 224 type WALSearchOptions struct { 225 // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. 226 IgnoreDataCorruptionErrors bool 227 } 228 229 // SearchForEndHeight searches for the EndHeightMessage with the given height 230 // and returns an auto.GroupReader, whenever it was found or not and an error. 231 // Group reader will be nil if found equals false. 232 // 233 // CONTRACT: caller must close group reader. 234 func (wal *BaseWAL) SearchForEndHeight( 235 height int64, 236 options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 237 var ( 238 msg *TimedWALMessage 239 gr *auto.GroupReader 240 ) 241 lastHeightFound := int64(-1) 242 243 // NOTE: starting from the last file in the group because we're usually 244 // searching for the last height. See replay.go 245 min, max := wal.group.MinIndex(), wal.group.MaxIndex() 246 wal.logger.Info("Searching for height", "height", height, "min", min, "max", max) 247 for index := max; index >= min; index-- { 248 gr, err = wal.group.NewReader(index) 249 if err != nil { 250 return nil, false, err 251 } 252 253 dec := NewWALDecoder(gr) 254 for { 255 msg, err = dec.Decode() 256 if err == io.EOF { 257 // OPTIMISATION: no need to look for height in older files if we've seen h < height 258 if lastHeightFound > 0 && lastHeightFound < height { 259 gr.Close() 260 return nil, false, nil 261 } 262 // check next file 263 break 264 } 265 if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { 266 wal.logger.Error("Corrupted entry. Skipping...", "err", err) 267 // do nothing 268 continue 269 } else if err != nil { 270 gr.Close() 271 return nil, false, err 272 } 273 274 if m, ok := msg.Msg.(EndHeightMessage); ok { 275 lastHeightFound = m.Height 276 if m.Height == height { // found 277 wal.logger.Info("Found", "height", height, "index", index) 278 return gr, true, nil 279 } 280 } 281 } 282 gr.Close() 283 } 284 285 return nil, false, nil 286 } 287 288 // A WALEncoder writes custom-encoded WAL messages to an output stream. 289 // 290 // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value 291 type WALEncoder struct { 292 wr io.Writer 293 } 294 295 // NewWALEncoder returns a new encoder that writes to wr. 296 func NewWALEncoder(wr io.Writer) *WALEncoder { 297 return &WALEncoder{wr} 298 } 299 300 // Encode writes the custom encoding of v to the stream. It returns an error if 301 // the encoded size of v is greater than 4MB. Any error encountered 302 // during the write is also returned. 303 func (enc *WALEncoder) Encode(v *TimedWALMessage) error { 304 pbMsg, err := WALToProto(v.Msg) 305 if err != nil { 306 return err 307 } 308 pv := tmcons.TimedWALMessage{ 309 Time: v.Time, 310 Msg: pbMsg, 311 } 312 313 data, err := proto.Marshal(&pv) 314 if err != nil { 315 panic(fmt.Errorf("encode timed wall message failure: %w", err)) 316 } 317 318 crc := crc32.Checksum(data, crc32c) 319 length := uint32(len(data)) 320 if length > maxMsgSizeBytes { 321 return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) 322 } 323 totalLength := 8 + int(length) 324 325 msg := make([]byte, totalLength) 326 binary.BigEndian.PutUint32(msg[0:4], crc) 327 binary.BigEndian.PutUint32(msg[4:8], length) 328 copy(msg[8:], data) 329 330 _, err = enc.wr.Write(msg) 331 return err 332 } 333 334 // IsDataCorruptionError returns true if data has been corrupted inside WAL. 335 func IsDataCorruptionError(err error) bool { 336 _, ok := err.(DataCorruptionError) 337 return ok 338 } 339 340 // DataCorruptionError is an error that occures if data on disk was corrupted. 341 type DataCorruptionError struct { 342 cause error 343 } 344 345 func (e DataCorruptionError) Error() string { 346 return fmt.Sprintf("DataCorruptionError[%v]", e.cause) 347 } 348 349 func (e DataCorruptionError) Cause() error { 350 return e.cause 351 } 352 353 // A WALDecoder reads and decodes custom-encoded WAL messages from an input 354 // stream. See WALEncoder for the format used. 355 // 356 // It will also compare the checksums and make sure data size is equal to the 357 // length from the header. If that is not the case, error will be returned. 358 type WALDecoder struct { 359 rd io.Reader 360 } 361 362 // NewWALDecoder returns a new decoder that reads from rd. 363 func NewWALDecoder(rd io.Reader) *WALDecoder { 364 return &WALDecoder{rd} 365 } 366 367 // Decode reads the next custom-encoded value from its reader and returns it. 368 func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { 369 b := make([]byte, 4) 370 371 _, err := dec.rd.Read(b) 372 if errors.Is(err, io.EOF) { 373 return nil, err 374 } 375 if err != nil { 376 return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %w", err)} 377 } 378 crc := binary.BigEndian.Uint32(b) 379 380 b = make([]byte, 4) 381 _, err = dec.rd.Read(b) 382 if err != nil { 383 return nil, DataCorruptionError{fmt.Errorf("failed to read length: %w", err)} 384 } 385 length := binary.BigEndian.Uint32(b) 386 387 if length > maxMsgSizeBytes { 388 return nil, DataCorruptionError{fmt.Errorf( 389 "length %d exceeded maximum possible value of %d bytes", 390 length, 391 maxMsgSizeBytes)} 392 } 393 394 data := make([]byte, length) 395 n, err := dec.rd.Read(data) 396 if err != nil { 397 return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} 398 } 399 400 // check checksum before decoding data 401 actualCRC := crc32.Checksum(data, crc32c) 402 if actualCRC != crc { 403 return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} 404 } 405 406 var res = new(tmcons.TimedWALMessage) 407 err = proto.Unmarshal(data, res) 408 if err != nil { 409 return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %w", err)} 410 } 411 412 walMsg, err := WALFromProto(res.Msg) 413 if err != nil { 414 return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)} 415 } 416 tMsgWal := &TimedWALMessage{ 417 Time: res.Time, 418 Msg: walMsg, 419 } 420 421 return tMsgWal, err 422 } 423 424 type nilWAL struct{} 425 426 var _ WAL = nilWAL{} 427 428 func (nilWAL) Write(m WALMessage) error { return nil } 429 func (nilWAL) WriteSync(m WALMessage) error { return nil } 430 func (nilWAL) FlushAndSync() error { return nil } 431 func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 432 return nil, false, nil 433 } 434 func (nilWAL) Start(context.Context) error { return nil } 435 func (nilWAL) Stop() {} 436 func (nilWAL) Wait() {}