github.com/516108736/tendermint@v0.36.0/consensus/wal.go (about) 1 package consensus 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 "hash/crc32" 8 "io" 9 "path/filepath" 10 "time" 11 12 "github.com/gogo/protobuf/proto" 13 14 auto "github.com/tendermint/tendermint/libs/autofile" 15 tmjson "github.com/tendermint/tendermint/libs/json" 16 "github.com/tendermint/tendermint/libs/log" 17 tmos "github.com/tendermint/tendermint/libs/os" 18 "github.com/tendermint/tendermint/libs/service" 19 tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus" 20 tmtime "github.com/tendermint/tendermint/types/time" 21 ) 22 23 const ( 24 // time.Time + max consensus msg size 25 maxMsgSizeBytes = maxMsgSize + 24 26 27 // how often the WAL should be sync'd during period sync'ing 28 walDefaultFlushInterval = 2 * time.Second 29 ) 30 31 //-------------------------------------------------------- 32 // types and functions for savings consensus messages 33 34 // TimedWALMessage wraps WALMessage and adds Time for debugging purposes. 35 type TimedWALMessage struct { 36 Time time.Time `json:"time"` 37 Msg WALMessage `json:"msg"` 38 } 39 40 // EndHeightMessage marks the end of the given height inside WAL. 41 // @internal used by scripts/wal2json util. 42 type EndHeightMessage struct { 43 Height int64 `json:"height"` 44 } 45 46 type WALMessage interface{} 47 48 func init() { 49 tmjson.RegisterType(msgInfo{}, "tendermint/wal/MsgInfo") 50 tmjson.RegisterType(timeoutInfo{}, "tendermint/wal/TimeoutInfo") 51 tmjson.RegisterType(EndHeightMessage{}, "tendermint/wal/EndHeightMessage") 52 } 53 54 //-------------------------------------------------------- 55 // Simple write-ahead logger 56 57 // WAL is an interface for any write-ahead logger. 58 type WAL interface { 59 Write(WALMessage) error 60 WriteSync(WALMessage) error 61 FlushAndSync() error 62 63 SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) 64 65 // service methods 66 Start() error 67 Stop() error 68 Wait() 69 } 70 71 // Write ahead logger writes msgs to disk before they are processed. 72 // Can be used for crash-recovery and deterministic replay. 73 // TODO: currently the wal is overwritten during replay catchup, give it a mode 74 // so it's either reading or appending - must read to end to start appending 75 // again. 76 type BaseWAL struct { 77 service.BaseService 78 79 group *auto.Group 80 81 enc *WALEncoder 82 83 flushTicker *time.Ticker 84 flushInterval time.Duration 85 } 86 87 var _ WAL = &BaseWAL{} 88 89 // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements 90 // WAL. It's flushed and synced to disk every 2s and once when stopped. 91 func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { 92 err := tmos.EnsureDir(filepath.Dir(walFile), 0700) 93 if err != nil { 94 return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err) 95 } 96 97 group, err := auto.OpenGroup(walFile, groupOptions...) 98 if err != nil { 99 return nil, err 100 } 101 wal := &BaseWAL{ 102 group: group, 103 enc: NewWALEncoder(group), 104 flushInterval: walDefaultFlushInterval, 105 } 106 wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal) 107 return wal, nil 108 } 109 110 // SetFlushInterval allows us to override the periodic flush interval for the WAL. 111 func (wal *BaseWAL) SetFlushInterval(i time.Duration) { 112 wal.flushInterval = i 113 } 114 115 func (wal *BaseWAL) Group() *auto.Group { 116 return wal.group 117 } 118 119 func (wal *BaseWAL) SetLogger(l log.Logger) { 120 wal.BaseService.Logger = l 121 wal.group.SetLogger(l) 122 } 123 124 func (wal *BaseWAL) OnStart() error { 125 size, err := wal.group.Head.Size() 126 if err != nil { 127 return err 128 } else if size == 0 { 129 if err := wal.WriteSync(EndHeightMessage{0}); err != nil { 130 return err 131 } 132 } 133 err = wal.group.Start() 134 if err != nil { 135 return err 136 } 137 wal.flushTicker = time.NewTicker(wal.flushInterval) 138 go wal.processFlushTicks() 139 return nil 140 } 141 142 func (wal *BaseWAL) processFlushTicks() { 143 for { 144 select { 145 case <-wal.flushTicker.C: 146 if err := wal.FlushAndSync(); err != nil { 147 wal.Logger.Error("Periodic WAL flush failed", "err", err) 148 } 149 case <-wal.Quit(): 150 return 151 } 152 } 153 } 154 155 // FlushAndSync flushes and fsync's the underlying group's data to disk. 156 // See auto#FlushAndSync 157 func (wal *BaseWAL) FlushAndSync() error { 158 return wal.group.FlushAndSync() 159 } 160 161 // Stop the underlying autofile group. 162 // Use Wait() to ensure it's finished shutting down 163 // before cleaning up files. 164 func (wal *BaseWAL) OnStop() { 165 wal.flushTicker.Stop() 166 if err := wal.FlushAndSync(); err != nil { 167 wal.Logger.Error("error on flush data to disk", "error", err) 168 } 169 if err := wal.group.Stop(); err != nil { 170 wal.Logger.Error("error trying to stop wal", "error", err) 171 } 172 wal.group.Close() 173 } 174 175 // Wait for the underlying autofile group to finish shutting down 176 // so it's safe to cleanup files. 177 func (wal *BaseWAL) Wait() { 178 wal.group.Wait() 179 } 180 181 // Write is called in newStep and for each receive on the 182 // peerMsgQueue and the timeoutTicker. 183 // NOTE: does not call fsync() 184 func (wal *BaseWAL) Write(msg WALMessage) error { 185 if wal == nil { 186 return nil 187 } 188 189 if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil { 190 wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height", 191 "err", err, "msg", msg) 192 return err 193 } 194 195 return nil 196 } 197 198 // WriteSync is called when we receive a msg from ourselves 199 // so that we write to disk before sending signed messages. 200 // NOTE: calls fsync() 201 func (wal *BaseWAL) WriteSync(msg WALMessage) error { 202 if wal == nil { 203 return nil 204 } 205 206 if err := wal.Write(msg); err != nil { 207 return err 208 } 209 210 if err := wal.FlushAndSync(); err != nil { 211 wal.Logger.Error(`WriteSync failed to flush consensus wal. 212 WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, 213 "err", err) 214 return err 215 } 216 217 return nil 218 } 219 220 // WALSearchOptions are optional arguments to SearchForEndHeight. 221 type WALSearchOptions struct { 222 // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. 223 IgnoreDataCorruptionErrors bool 224 } 225 226 // SearchForEndHeight searches for the EndHeightMessage with the given height 227 // and returns an auto.GroupReader, whenever it was found or not and an error. 228 // Group reader will be nil if found equals false. 229 // 230 // CONTRACT: caller must close group reader. 231 func (wal *BaseWAL) SearchForEndHeight( 232 height int64, 233 options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 234 var ( 235 msg *TimedWALMessage 236 gr *auto.GroupReader 237 ) 238 lastHeightFound := int64(-1) 239 240 // NOTE: starting from the last file in the group because we're usually 241 // searching for the last height. See replay.go 242 min, max := wal.group.MinIndex(), wal.group.MaxIndex() 243 wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) 244 for index := max; index >= min; index-- { 245 gr, err = wal.group.NewReader(index) 246 if err != nil { 247 return nil, false, err 248 } 249 250 dec := NewWALDecoder(gr) 251 for { 252 msg, err = dec.Decode() 253 if err == io.EOF { 254 // OPTIMISATION: no need to look for height in older files if we've seen h < height 255 if lastHeightFound > 0 && lastHeightFound < height { 256 gr.Close() 257 return nil, false, nil 258 } 259 // check next file 260 break 261 } 262 if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { 263 wal.Logger.Error("Corrupted entry. Skipping...", "err", err) 264 // do nothing 265 continue 266 } else if err != nil { 267 gr.Close() 268 return nil, false, err 269 } 270 271 if m, ok := msg.Msg.(EndHeightMessage); ok { 272 lastHeightFound = m.Height 273 if m.Height == height { // found 274 wal.Logger.Info("Found", "height", height, "index", index) 275 return gr, true, nil 276 } 277 } 278 } 279 gr.Close() 280 } 281 282 return nil, false, nil 283 } 284 285 // A WALEncoder writes custom-encoded WAL messages to an output stream. 286 // 287 // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value 288 type WALEncoder struct { 289 wr io.Writer 290 } 291 292 // NewWALEncoder returns a new encoder that writes to wr. 293 func NewWALEncoder(wr io.Writer) *WALEncoder { 294 return &WALEncoder{wr} 295 } 296 297 // Encode writes the custom encoding of v to the stream. It returns an error if 298 // the encoded size of v is greater than 1MB. Any error encountered 299 // during the write is also returned. 300 func (enc *WALEncoder) Encode(v *TimedWALMessage) error { 301 pbMsg, err := WALToProto(v.Msg) 302 if err != nil { 303 return err 304 } 305 pv := tmcons.TimedWALMessage{ 306 Time: v.Time, 307 Msg: pbMsg, 308 } 309 310 data, err := proto.Marshal(&pv) 311 if err != nil { 312 panic(fmt.Errorf("encode timed wall message failure: %w", err)) 313 } 314 315 crc := crc32.Checksum(data, crc32c) 316 length := uint32(len(data)) 317 if length > maxMsgSizeBytes { 318 return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) 319 } 320 totalLength := 8 + int(length) 321 322 msg := make([]byte, totalLength) 323 binary.BigEndian.PutUint32(msg[0:4], crc) 324 binary.BigEndian.PutUint32(msg[4:8], length) 325 copy(msg[8:], data) 326 327 _, err = enc.wr.Write(msg) 328 return err 329 } 330 331 // IsDataCorruptionError returns true if data has been corrupted inside WAL. 332 func IsDataCorruptionError(err error) bool { 333 _, ok := err.(DataCorruptionError) 334 return ok 335 } 336 337 // DataCorruptionError is an error that occures if data on disk was corrupted. 338 type DataCorruptionError struct { 339 cause error 340 } 341 342 func (e DataCorruptionError) Error() string { 343 return fmt.Sprintf("DataCorruptionError[%v]", e.cause) 344 } 345 346 func (e DataCorruptionError) Cause() error { 347 return e.cause 348 } 349 350 // A WALDecoder reads and decodes custom-encoded WAL messages from an input 351 // stream. See WALEncoder for the format used. 352 // 353 // It will also compare the checksums and make sure data size is equal to the 354 // length from the header. If that is not the case, error will be returned. 355 type WALDecoder struct { 356 rd io.Reader 357 } 358 359 // NewWALDecoder returns a new decoder that reads from rd. 360 func NewWALDecoder(rd io.Reader) *WALDecoder { 361 return &WALDecoder{rd} 362 } 363 364 // Decode reads the next custom-encoded value from its reader and returns it. 365 func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { 366 b := make([]byte, 4) 367 368 _, err := dec.rd.Read(b) 369 if errors.Is(err, io.EOF) { 370 return nil, err 371 } 372 if err != nil { 373 return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} 374 } 375 crc := binary.BigEndian.Uint32(b) 376 377 b = make([]byte, 4) 378 _, err = dec.rd.Read(b) 379 if err != nil { 380 return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} 381 } 382 length := binary.BigEndian.Uint32(b) 383 384 if length > maxMsgSizeBytes { 385 return nil, DataCorruptionError{fmt.Errorf( 386 "length %d exceeded maximum possible value of %d bytes", 387 length, 388 maxMsgSizeBytes)} 389 } 390 391 data := make([]byte, length) 392 n, err := dec.rd.Read(data) 393 if err != nil { 394 return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} 395 } 396 397 // check checksum before decoding data 398 actualCRC := crc32.Checksum(data, crc32c) 399 if actualCRC != crc { 400 return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} 401 } 402 403 var res = new(tmcons.TimedWALMessage) 404 err = proto.Unmarshal(data, res) 405 if err != nil { 406 return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} 407 } 408 409 walMsg, err := WALFromProto(res.Msg) 410 if err != nil { 411 return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)} 412 } 413 tMsgWal := &TimedWALMessage{ 414 Time: res.Time, 415 Msg: walMsg, 416 } 417 418 return tMsgWal, err 419 } 420 421 type nilWAL struct{} 422 423 var _ WAL = nilWAL{} 424 425 func (nilWAL) Write(m WALMessage) error { return nil } 426 func (nilWAL) WriteSync(m WALMessage) error { return nil } 427 func (nilWAL) FlushAndSync() error { return nil } 428 func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 429 return nil, false, nil 430 } 431 func (nilWAL) Start() error { return nil } 432 func (nilWAL) Stop() error { return nil } 433 func (nilWAL) Wait() {}