github.com/evdatsion/aphelion-dpos-bft@v0.32.1/consensus/wal.go (about) 1 package consensus 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 "hash/crc32" 7 "io" 8 "path/filepath" 9 "time" 10 11 "github.com/pkg/errors" 12 13 amino "github.com/evdatsion/go-amino" 14 auto "github.com/evdatsion/aphelion-dpos-bft/libs/autofile" 15 cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common" 16 "github.com/evdatsion/aphelion-dpos-bft/libs/log" 17 "github.com/evdatsion/aphelion-dpos-bft/types" 18 tmtime "github.com/evdatsion/aphelion-dpos-bft/types/time" 19 ) 20 21 const ( 22 // must be greater than types.BlockPartSizeBytes + a few bytes 23 maxMsgSizeBytes = 1024 * 1024 // 1MB 24 25 // how often the WAL should be sync'd during period sync'ing 26 walDefaultFlushInterval = 2 * time.Second 27 ) 28 29 //-------------------------------------------------------- 30 // types and functions for savings consensus messages 31 32 type TimedWALMessage struct { 33 Time time.Time `json:"time"` // for debugging purposes 34 Msg WALMessage `json:"msg"` 35 } 36 37 // EndHeightMessage marks the end of the given height inside WAL. 38 // @internal used by scripts/wal2json util. 39 type EndHeightMessage struct { 40 Height int64 `json:"height"` 41 } 42 43 type WALMessage interface{} 44 45 func RegisterWALMessages(cdc *amino.Codec) { 46 cdc.RegisterInterface((*WALMessage)(nil), nil) 47 cdc.RegisterConcrete(types.EventDataRoundState{}, "tendermint/wal/EventDataRoundState", nil) 48 cdc.RegisterConcrete(msgInfo{}, "tendermint/wal/MsgInfo", nil) 49 cdc.RegisterConcrete(timeoutInfo{}, "tendermint/wal/TimeoutInfo", nil) 50 cdc.RegisterConcrete(EndHeightMessage{}, "tendermint/wal/EndHeightMessage", nil) 51 } 52 53 //-------------------------------------------------------- 54 // Simple write-ahead logger 55 56 // WAL is an interface for any write-ahead logger. 57 type WAL interface { 58 Write(WALMessage) 59 WriteSync(WALMessage) 60 FlushAndSync() error 61 62 SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) 63 64 // service methods 65 Start() error 66 Stop() error 67 Wait() 68 } 69 70 // Write ahead logger writes msgs to disk before they are processed. 71 // Can be used for crash-recovery and deterministic replay. 72 // TODO: currently the wal is overwritten during replay catchup, give it a mode 73 // so it's either reading or appending - must read to end to start appending 74 // again. 75 type baseWAL struct { 76 cmn.BaseService 77 78 group *auto.Group 79 80 enc *WALEncoder 81 82 flushTicker *time.Ticker 83 flushInterval time.Duration 84 } 85 86 var _ WAL = &baseWAL{} 87 88 // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements 89 // WAL. It's flushed and synced to disk every 2s and once when stopped. 90 func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*baseWAL, error) { 91 err := cmn.EnsureDir(filepath.Dir(walFile), 0700) 92 if err != nil { 93 return nil, errors.Wrap(err, "failed to ensure WAL directory is in place") 94 } 95 96 group, err := auto.OpenGroup(walFile, groupOptions...) 97 if err != nil { 98 return nil, err 99 } 100 wal := &baseWAL{ 101 group: group, 102 enc: NewWALEncoder(group), 103 flushInterval: walDefaultFlushInterval, 104 } 105 wal.BaseService = *cmn.NewBaseService(nil, "baseWAL", wal) 106 return wal, nil 107 } 108 109 // SetFlushInterval allows us to override the periodic flush interval for the WAL. 110 func (wal *baseWAL) SetFlushInterval(i time.Duration) { 111 wal.flushInterval = i 112 } 113 114 func (wal *baseWAL) Group() *auto.Group { 115 return wal.group 116 } 117 118 func (wal *baseWAL) SetLogger(l log.Logger) { 119 wal.BaseService.Logger = l 120 wal.group.SetLogger(l) 121 } 122 123 func (wal *baseWAL) OnStart() error { 124 size, err := wal.group.Head.Size() 125 if err != nil { 126 return err 127 } else if size == 0 { 128 wal.WriteSync(EndHeightMessage{0}) 129 } 130 err = wal.group.Start() 131 if err != nil { 132 return err 133 } 134 wal.flushTicker = time.NewTicker(wal.flushInterval) 135 go wal.processFlushTicks() 136 return nil 137 } 138 139 func (wal *baseWAL) processFlushTicks() { 140 for { 141 select { 142 case <-wal.flushTicker.C: 143 if err := wal.FlushAndSync(); err != nil { 144 wal.Logger.Error("Periodic WAL flush failed", "err", err) 145 } 146 case <-wal.Quit(): 147 return 148 } 149 } 150 } 151 152 // FlushAndSync flushes and fsync's the underlying group's data to disk. 153 // See auto#FlushAndSync 154 func (wal *baseWAL) FlushAndSync() error { 155 return wal.group.FlushAndSync() 156 } 157 158 // Stop the underlying autofile group. 159 // Use Wait() to ensure it's finished shutting down 160 // before cleaning up files. 161 func (wal *baseWAL) OnStop() { 162 wal.flushTicker.Stop() 163 wal.FlushAndSync() 164 wal.group.Stop() 165 wal.group.Close() 166 } 167 168 // Wait for the underlying autofile group to finish shutting down 169 // so it's safe to cleanup files. 170 func (wal *baseWAL) Wait() { 171 wal.group.Wait() 172 } 173 174 // Write is called in newStep and for each receive on the 175 // peerMsgQueue and the timeoutTicker. 176 // NOTE: does not call fsync() 177 func (wal *baseWAL) Write(msg WALMessage) { 178 if wal == nil { 179 return 180 } 181 182 // Write the wal message 183 if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil { 184 panic(fmt.Sprintf("Error writing msg to consensus wal: %v \n\nMessage: %v", err, msg)) 185 } 186 } 187 188 // WriteSync is called when we receive a msg from ourselves 189 // so that we write to disk before sending signed messages. 190 // NOTE: calls fsync() 191 func (wal *baseWAL) WriteSync(msg WALMessage) { 192 if wal == nil { 193 return 194 } 195 196 wal.Write(msg) 197 if err := wal.FlushAndSync(); err != nil { 198 panic(fmt.Sprintf("Error flushing consensus wal buf to file. Error: %v \n", err)) 199 } 200 } 201 202 // WALSearchOptions are optional arguments to SearchForEndHeight. 203 type WALSearchOptions struct { 204 // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. 205 IgnoreDataCorruptionErrors bool 206 } 207 208 // SearchForEndHeight searches for the EndHeightMessage with the given height 209 // and returns an auto.GroupReader, whenever it was found or not and an error. 210 // Group reader will be nil if found equals false. 211 // 212 // CONTRACT: caller must close group reader. 213 func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 214 var ( 215 msg *TimedWALMessage 216 gr *auto.GroupReader 217 ) 218 lastHeightFound := int64(-1) 219 220 // NOTE: starting from the last file in the group because we're usually 221 // searching for the last height. See replay.go 222 min, max := wal.group.MinIndex(), wal.group.MaxIndex() 223 wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) 224 for index := max; index >= min; index-- { 225 gr, err = wal.group.NewReader(index) 226 if err != nil { 227 return nil, false, err 228 } 229 230 dec := NewWALDecoder(gr) 231 for { 232 msg, err = dec.Decode() 233 if err == io.EOF { 234 // OPTIMISATION: no need to look for height in older files if we've seen h < height 235 if lastHeightFound > 0 && lastHeightFound < height { 236 gr.Close() 237 return nil, false, nil 238 } 239 // check next file 240 break 241 } 242 if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { 243 wal.Logger.Error("Corrupted entry. Skipping...", "err", err) 244 // do nothing 245 continue 246 } else if err != nil { 247 gr.Close() 248 return nil, false, err 249 } 250 251 if m, ok := msg.Msg.(EndHeightMessage); ok { 252 lastHeightFound = m.Height 253 if m.Height == height { // found 254 wal.Logger.Info("Found", "height", height, "index", index) 255 return gr, true, nil 256 } 257 } 258 } 259 gr.Close() 260 } 261 262 return nil, false, nil 263 } 264 265 /////////////////////////////////////////////////////////////////////////////// 266 267 // A WALEncoder writes custom-encoded WAL messages to an output stream. 268 // 269 // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value (go-amino encoded) 270 type WALEncoder struct { 271 wr io.Writer 272 } 273 274 // NewWALEncoder returns a new encoder that writes to wr. 275 func NewWALEncoder(wr io.Writer) *WALEncoder { 276 return &WALEncoder{wr} 277 } 278 279 // Encode writes the custom encoding of v to the stream. It returns an error if 280 // the amino-encoded size of v is greater than 1MB. Any error encountered 281 // during the write is also returned. 282 func (enc *WALEncoder) Encode(v *TimedWALMessage) error { 283 data := cdc.MustMarshalBinaryBare(v) 284 285 crc := crc32.Checksum(data, crc32c) 286 length := uint32(len(data)) 287 if length > maxMsgSizeBytes { 288 return fmt.Errorf("Msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) 289 } 290 totalLength := 8 + int(length) 291 292 msg := make([]byte, totalLength) 293 binary.BigEndian.PutUint32(msg[0:4], crc) 294 binary.BigEndian.PutUint32(msg[4:8], length) 295 copy(msg[8:], data) 296 297 _, err := enc.wr.Write(msg) 298 299 return err 300 } 301 302 /////////////////////////////////////////////////////////////////////////////// 303 304 // IsDataCorruptionError returns true if data has been corrupted inside WAL. 305 func IsDataCorruptionError(err error) bool { 306 _, ok := err.(DataCorruptionError) 307 return ok 308 } 309 310 // DataCorruptionError is an error that occures if data on disk was corrupted. 311 type DataCorruptionError struct { 312 cause error 313 } 314 315 func (e DataCorruptionError) Error() string { 316 return fmt.Sprintf("DataCorruptionError[%v]", e.cause) 317 } 318 319 func (e DataCorruptionError) Cause() error { 320 return e.cause 321 } 322 323 // A WALDecoder reads and decodes custom-encoded WAL messages from an input 324 // stream. See WALEncoder for the format used. 325 // 326 // It will also compare the checksums and make sure data size is equal to the 327 // length from the header. If that is not the case, error will be returned. 328 type WALDecoder struct { 329 rd io.Reader 330 } 331 332 // NewWALDecoder returns a new decoder that reads from rd. 333 func NewWALDecoder(rd io.Reader) *WALDecoder { 334 return &WALDecoder{rd} 335 } 336 337 // Decode reads the next custom-encoded value from its reader and returns it. 338 func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { 339 b := make([]byte, 4) 340 341 _, err := dec.rd.Read(b) 342 if err == io.EOF { 343 return nil, err 344 } 345 if err != nil { 346 return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} 347 } 348 crc := binary.BigEndian.Uint32(b) 349 350 b = make([]byte, 4) 351 _, err = dec.rd.Read(b) 352 if err != nil { 353 return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} 354 } 355 length := binary.BigEndian.Uint32(b) 356 357 if length > maxMsgSizeBytes { 358 return nil, DataCorruptionError{fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)} 359 } 360 361 data := make([]byte, length) 362 n, err := dec.rd.Read(data) 363 if err != nil { 364 return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} 365 } 366 367 // check checksum before decoding data 368 actualCRC := crc32.Checksum(data, crc32c) 369 if actualCRC != crc { 370 return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} 371 } 372 373 var res = new(TimedWALMessage) // nolint: gosimple 374 err = cdc.UnmarshalBinaryBare(data, res) 375 if err != nil { 376 return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} 377 } 378 379 return res, err 380 } 381 382 type nilWAL struct{} 383 384 var _ WAL = nilWAL{} 385 386 func (nilWAL) Write(m WALMessage) {} 387 func (nilWAL) WriteSync(m WALMessage) {} 388 func (nilWAL) FlushAndSync() error { return nil } 389 func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 390 return nil, false, nil 391 } 392 func (nilWAL) Start() error { return nil } 393 func (nilWAL) Stop() error { return nil } 394 func (nilWAL) Wait() {}