github.com/okex/exchain@v1.8.0/libs/tendermint/consensus/wal.go (about) 1 package consensus 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 "hash/crc32" 7 "io" 8 "path/filepath" 9 "time" 10 11 "github.com/pkg/errors" 12 13 amino "github.com/tendermint/go-amino" 14 15 auto "github.com/okex/exchain/libs/tendermint/libs/autofile" 16 "github.com/okex/exchain/libs/tendermint/libs/log" 17 tmos "github.com/okex/exchain/libs/tendermint/libs/os" 18 "github.com/okex/exchain/libs/tendermint/libs/service" 19 "github.com/okex/exchain/libs/tendermint/types" 20 tmtime "github.com/okex/exchain/libs/tendermint/types/time" 21 ) 22 23 const ( 24 // amino overhead + time.Time + max consensus msg size 25 // 26 // q: where 24 bytes are coming from? 27 // a: cdc.MustMarshalBinaryBare(empty consensus part msg) = 14 bytes. +10 28 // bytes just in case amino will require more space in the future. 29 maxMsgSizeBytes = maxMsgSize + 24 30 31 // how often the WAL should be sync'd during period sync'ing 32 walDefaultFlushInterval = 2 * time.Second 33 34 // if write wal time is more than walAlertTime, should log error 35 walAlertTime = 500 * time.Millisecond 36 ) 37 38 //-------------------------------------------------------- 39 // types and functions for savings consensus messages 40 41 // TimedWALMessage wraps WALMessage and adds Time for debugging purposes. 42 type TimedWALMessage struct { 43 Time time.Time `json:"time"` 44 Msg WALMessage `json:"msg"` 45 } 46 47 // EndHeightMessage marks the end of the given height inside WAL. 48 // @internal used by scripts/wal2json util. 49 type EndHeightMessage struct { 50 Height int64 `json:"height"` 51 } 52 53 type WALMessage interface{} 54 55 func RegisterWALMessages(cdc *amino.Codec) { 56 cdc.RegisterInterface((*WALMessage)(nil), nil) 57 cdc.RegisterConcrete(types.EventDataRoundState{}, "tendermint/wal/EventDataRoundState", nil) 58 cdc.RegisterConcrete(msgInfo{}, "tendermint/wal/MsgInfo", nil) 59 cdc.RegisterConcrete(timeoutInfo{}, "tendermint/wal/TimeoutInfo", nil) 60 cdc.RegisterConcrete(EndHeightMessage{}, "tendermint/wal/EndHeightMessage", nil) 61 } 62 63 //-------------------------------------------------------- 64 // Simple write-ahead logger 65 66 // WAL is an interface for any write-ahead logger. 67 type WAL interface { 68 Write(WALMessage) error 69 WriteSync(WALMessage) error 70 FlushAndSync() error 71 72 SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) 73 74 // service methods 75 Start() error 76 Reset() error 77 Stop() error 78 Wait() 79 } 80 81 // Write ahead logger writes msgs to disk before they are processed. 82 // Can be used for crash-recovery and deterministic replay. 83 // TODO: currently the wal is overwritten during replay catchup, give it a mode 84 // so it's either reading or appending - must read to end to start appending 85 // again. 86 type BaseWAL struct { 87 service.BaseService 88 89 group *auto.Group 90 91 enc *WALEncoder 92 93 flushTicker *time.Ticker 94 flushInterval time.Duration 95 } 96 97 var _ WAL = &BaseWAL{} 98 99 // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements 100 // WAL. It's flushed and synced to disk every 2s and once when stopped. 101 func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { 102 err := tmos.EnsureDir(filepath.Dir(walFile), 0700) 103 if err != nil { 104 return nil, errors.Wrap(err, "failed to ensure WAL directory is in place") 105 } 106 107 group, err := auto.OpenGroup(walFile, groupOptions...) 108 if err != nil { 109 return nil, err 110 } 111 wal := &BaseWAL{ 112 group: group, 113 enc: NewWALEncoder(group), 114 flushInterval: walDefaultFlushInterval, 115 } 116 wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal) 117 return wal, nil 118 } 119 120 // SetFlushInterval allows us to override the periodic flush interval for the WAL. 121 func (wal *BaseWAL) SetFlushInterval(i time.Duration) { 122 wal.flushInterval = i 123 } 124 125 func (wal *BaseWAL) Group() *auto.Group { 126 return wal.group 127 } 128 129 func (wal *BaseWAL) SetLogger(l log.Logger) { 130 wal.BaseService.Logger = l 131 wal.group.SetLogger(l) 132 } 133 134 func (wal *BaseWAL) OnStart() error { 135 size, err := wal.group.Head.Size() 136 if err != nil { 137 return err 138 } else if size == 0 { 139 wal.WriteSync(EndHeightMessage{types.GetStartBlockHeight()}) 140 } 141 err = wal.group.Start() 142 if err != nil { 143 return err 144 } 145 wal.flushTicker = time.NewTicker(wal.flushInterval) 146 go wal.processFlushTicks() 147 return nil 148 } 149 150 func (wal *BaseWAL) OnReset() error { 151 //size, err := wal.group.Head.Size() 152 //if err != nil { 153 // return err 154 //} else if size == 0 { 155 // wal.WriteSync(EndHeightMessage{types.GetStartBlockHeight()}) 156 //} 157 err := wal.group.Reset() 158 if err != nil { 159 return err 160 } 161 //wal.flushTicker.Reset(wal.flushInterval) 162 //go wal.processFlushTicks() 163 164 return nil 165 } 166 167 func (wal *BaseWAL) processFlushTicks() { 168 for { 169 select { 170 case <-wal.flushTicker.C: 171 if err := wal.FlushAndSync(); err != nil { 172 wal.Logger.Error("Periodic WAL flush failed", "err", err) 173 } 174 case <-wal.Quit(): 175 return 176 } 177 } 178 } 179 180 // FlushAndSync flushes and fsync's the underlying group's data to disk. 181 // See auto#FlushAndSync 182 func (wal *BaseWAL) FlushAndSync() error { 183 return wal.group.FlushAndSync() 184 } 185 186 // Stop the underlying autofile group. 187 // Use Wait() to ensure it's finished shutting down 188 // before cleaning up files. 189 func (wal *BaseWAL) OnStop() { 190 wal.flushTicker.Stop() 191 wal.FlushAndSync() 192 wal.group.Stop() 193 wal.group.Close() 194 } 195 196 // Wait for the underlying autofile group to finish shutting down 197 // so it's safe to cleanup files. 198 func (wal *BaseWAL) Wait() { 199 wal.group.Wait() 200 } 201 202 // Write is called in newStep and for each receive on the 203 // peerMsgQueue and the timeoutTicker. 204 // NOTE: does not call fsync() 205 func (wal *BaseWAL) Write(msg WALMessage) error { 206 t0 := tmtime.Now() 207 if wal == nil { 208 return nil 209 } 210 211 if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil { 212 wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height", 213 "err", err, "msg", msg) 214 return err 215 } 216 if t := tmtime.Now().Sub(t0); t > walAlertTime { 217 wal.Logger.Error("WAL Write Message", "time", t, "msg", msg) 218 } 219 220 return nil 221 } 222 223 // WriteSync is called when we receive a msg from ourselves 224 // so that we write to disk before sending signed messages. 225 // NOTE: calls fsync() 226 func (wal *BaseWAL) WriteSync(msg WALMessage) error { 227 t0 := tmtime.Now() 228 if wal == nil { 229 return nil 230 } 231 232 if err := wal.Write(msg); err != nil { 233 return err 234 } 235 236 if err := wal.FlushAndSync(); err != nil { 237 wal.Logger.Error(`WriteSync failed to flush consensus wal. 238 WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, 239 "err", err) 240 return err 241 } 242 if t := tmtime.Now().Sub(t0); t > walAlertTime { 243 wal.Logger.Error("WriteSync WAL", "time", t, "msg", msg) 244 } 245 246 return nil 247 } 248 249 // WALSearchOptions are optional arguments to SearchForEndHeight. 250 type WALSearchOptions struct { 251 // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. 252 IgnoreDataCorruptionErrors bool 253 } 254 255 // SearchForEndHeight searches for the EndHeightMessage with the given height 256 // and returns an auto.GroupReader, whenever it was found or not and an error. 257 // Group reader will be nil if found equals false. 258 // 259 // CONTRACT: caller must close group reader. 260 func (wal *BaseWAL) SearchForEndHeight( 261 height int64, 262 options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 263 var ( 264 msg *TimedWALMessage 265 gr *auto.GroupReader 266 ) 267 lastHeightFound := int64(-1) 268 269 // NOTE: starting from the last file in the group because we're usually 270 // searching for the last height. See replay.go 271 min, max := wal.group.MinIndex(), wal.group.MaxIndex() 272 wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) 273 for index := max; index >= min; index-- { 274 gr, err = wal.group.NewReader(index) 275 if err != nil { 276 return nil, false, err 277 } 278 279 dec := NewWALDecoder(gr) 280 for { 281 msg, err = dec.Decode() 282 if err == io.EOF { 283 // OPTIMISATION: no need to look for height in older files if we've seen h < height 284 if lastHeightFound > 0 && lastHeightFound < height { 285 gr.Close() 286 return nil, false, nil 287 } 288 // check next file 289 break 290 } 291 if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { 292 wal.Logger.Error("Corrupted entry. Skipping...", "err", err) 293 // do nothing 294 continue 295 } else if err != nil { 296 gr.Close() 297 return nil, false, err 298 } 299 300 if m, ok := msg.Msg.(EndHeightMessage); ok { 301 lastHeightFound = m.Height 302 if m.Height == height { // found 303 wal.Logger.Info("Found", "height", height, "index", index) 304 return gr, true, nil 305 } 306 } 307 } 308 gr.Close() 309 } 310 311 return nil, false, nil 312 } 313 314 /////////////////////////////////////////////////////////////////////////////// 315 316 // A WALEncoder writes custom-encoded WAL messages to an output stream. 317 // 318 // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value (go-amino encoded) 319 type WALEncoder struct { 320 wr io.Writer 321 } 322 323 // NewWALEncoder returns a new encoder that writes to wr. 324 func NewWALEncoder(wr io.Writer) *WALEncoder { 325 return &WALEncoder{wr} 326 } 327 328 // Encode writes the custom encoding of v to the stream. It returns an error if 329 // the amino-encoded size of v is greater than 1MB. Any error encountered 330 // during the write is also returned. 331 func (enc *WALEncoder) Encode(v *TimedWALMessage) error { 332 data := cdc.MustMarshalBinaryBare(v) 333 334 crc := crc32.Checksum(data, crc32c) 335 length := uint32(len(data)) 336 if length > maxMsgSizeBytes { 337 return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) 338 } 339 totalLength := 8 + int(length) 340 341 msg := make([]byte, totalLength) 342 binary.BigEndian.PutUint32(msg[0:4], crc) 343 binary.BigEndian.PutUint32(msg[4:8], length) 344 copy(msg[8:], data) 345 346 _, err := enc.wr.Write(msg) 347 return err 348 } 349 350 /////////////////////////////////////////////////////////////////////////////// 351 352 // IsDataCorruptionError returns true if data has been corrupted inside WAL. 353 func IsDataCorruptionError(err error) bool { 354 _, ok := err.(DataCorruptionError) 355 return ok 356 } 357 358 // DataCorruptionError is an error that occures if data on disk was corrupted. 359 type DataCorruptionError struct { 360 cause error 361 } 362 363 func (e DataCorruptionError) Error() string { 364 return fmt.Sprintf("DataCorruptionError[%v]", e.cause) 365 } 366 367 func (e DataCorruptionError) Cause() error { 368 return e.cause 369 } 370 371 // A WALDecoder reads and decodes custom-encoded WAL messages from an input 372 // stream. See WALEncoder for the format used. 373 // 374 // It will also compare the checksums and make sure data size is equal to the 375 // length from the header. If that is not the case, error will be returned. 376 type WALDecoder struct { 377 rd io.Reader 378 } 379 380 // NewWALDecoder returns a new decoder that reads from rd. 381 func NewWALDecoder(rd io.Reader) *WALDecoder { 382 return &WALDecoder{rd} 383 } 384 385 // Decode reads the next custom-encoded value from its reader and returns it. 386 func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { 387 b := make([]byte, 4) 388 389 _, err := dec.rd.Read(b) 390 if err == io.EOF { 391 return nil, err 392 } 393 if err != nil { 394 return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} 395 } 396 crc := binary.BigEndian.Uint32(b) 397 398 b = make([]byte, 4) 399 _, err = dec.rd.Read(b) 400 if err != nil { 401 return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} 402 } 403 length := binary.BigEndian.Uint32(b) 404 405 if length > maxMsgSizeBytes { 406 return nil, DataCorruptionError{fmt.Errorf( 407 "length %d exceeded maximum possible value of %d bytes", 408 length, 409 maxMsgSizeBytes)} 410 } 411 412 data := make([]byte, length) 413 n, err := dec.rd.Read(data) 414 if err != nil { 415 return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} 416 } 417 418 // check checksum before decoding data 419 actualCRC := crc32.Checksum(data, crc32c) 420 if actualCRC != crc { 421 return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} 422 } 423 424 var res = new(TimedWALMessage) // nolint: gosimple 425 err = cdc.UnmarshalBinaryBare(data, res) 426 if err != nil { 427 return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} 428 } 429 430 return res, err 431 } 432 433 type nilWAL struct{} 434 435 var _ WAL = nilWAL{} 436 437 func (nilWAL) Write(m WALMessage) error { return nil } 438 func (nilWAL) WriteSync(m WALMessage) error { return nil } 439 func (nilWAL) FlushAndSync() error { return nil } 440 func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 441 return nil, false, nil 442 } 443 func (nilWAL) Start() error { return nil } 444 func (nilWAL) Reset() error { return nil } 445 func (nilWAL) Stop() error { return nil } 446 func (nilWAL) Wait() {}