github.com/Finschia/ostracon@v1.1.5/consensus/wal.go (about) 1 package consensus 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 "hash/crc32" 8 "io" 9 "path/filepath" 10 "time" 11 12 "github.com/gogo/protobuf/proto" 13 14 tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus" 15 16 auto "github.com/Finschia/ostracon/libs/autofile" 17 tmjson "github.com/Finschia/ostracon/libs/json" 18 "github.com/Finschia/ostracon/libs/log" 19 tmos "github.com/Finschia/ostracon/libs/os" 20 "github.com/Finschia/ostracon/libs/service" 21 tmtime "github.com/Finschia/ostracon/types/time" 22 ) 23 24 const ( 25 // time.Time + max consensus msg size 26 maxMsgSizeBytes = maxMsgSize + 24 27 28 // how often the WAL should be sync'd during period sync'ing 29 walDefaultFlushInterval = 2 * time.Second 30 ) 31 32 //-------------------------------------------------------- 33 // types and functions for savings consensus messages 34 35 // TimedWALMessage wraps WALMessage and adds Time for debugging purposes. 36 type TimedWALMessage struct { 37 Time time.Time `json:"time"` 38 Msg WALMessage `json:"msg"` 39 } 40 41 // EndHeightMessage marks the end of the given height inside WAL. 42 // @internal used by scripts/wal2json util. 43 type EndHeightMessage struct { 44 Height int64 `json:"height"` 45 } 46 47 type WALMessage interface{} 48 49 func init() { 50 tmjson.RegisterType(msgInfo{}, "ostracon/wal/MsgInfo") 51 tmjson.RegisterType(timeoutInfo{}, "ostracon/wal/TimeoutInfo") 52 tmjson.RegisterType(EndHeightMessage{}, "ostracon/wal/EndHeightMessage") 53 } 54 55 //-------------------------------------------------------- 56 // Simple write-ahead logger 57 58 // WAL is an interface for any write-ahead logger. 59 type WAL interface { 60 Write(WALMessage) error 61 WriteSync(WALMessage) error 62 FlushAndSync() error 63 64 SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) 65 66 // service methods 67 Start() error 68 Stop() error 69 Wait() 70 } 71 72 // Write ahead logger writes msgs to disk before they are processed. 73 // Can be used for crash-recovery and deterministic replay. 74 // TODO: currently the wal is overwritten during replay catchup, give it a mode 75 // so it's either reading or appending - must read to end to start appending 76 // again. 77 type BaseWAL struct { 78 service.BaseService 79 80 group *auto.Group 81 82 enc *WALEncoder 83 84 flushTicker *time.Ticker 85 flushInterval time.Duration 86 } 87 88 var _ WAL = &BaseWAL{} 89 90 // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements 91 // WAL. It's flushed and synced to disk every 2s and once when stopped. 92 func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { 93 err := tmos.EnsureDir(filepath.Dir(walFile), 0700) 94 if err != nil { 95 return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err) 96 } 97 98 group, err := auto.OpenGroup(walFile, groupOptions...) 99 if err != nil { 100 return nil, err 101 } 102 wal := &BaseWAL{ 103 group: group, 104 enc: NewWALEncoder(group), 105 flushInterval: walDefaultFlushInterval, 106 } 107 wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal) 108 return wal, nil 109 } 110 111 // SetFlushInterval allows us to override the periodic flush interval for the WAL. 112 func (wal *BaseWAL) SetFlushInterval(i time.Duration) { 113 wal.flushInterval = i 114 } 115 116 func (wal *BaseWAL) Group() *auto.Group { 117 return wal.group 118 } 119 120 func (wal *BaseWAL) SetLogger(l log.Logger) { 121 wal.BaseService.Logger = l 122 wal.group.SetLogger(l) 123 } 124 125 func (wal *BaseWAL) OnStart() error { 126 size, err := wal.group.Head.Size() 127 if err != nil { 128 return err 129 } else if size == 0 { 130 if err := wal.WriteSync(EndHeightMessage{0}); err != nil { 131 return err 132 } 133 } 134 err = wal.group.Start() 135 if err != nil { 136 return err 137 } 138 wal.flushTicker = time.NewTicker(wal.flushInterval) 139 go wal.processFlushTicks() 140 return nil 141 } 142 143 func (wal *BaseWAL) processFlushTicks() { 144 for { 145 select { 146 case <-wal.flushTicker.C: 147 if err := wal.FlushAndSync(); err != nil { 148 wal.Logger.Error("Periodic WAL flush failed", "err", err) 149 } 150 case <-wal.Quit(): 151 return 152 } 153 } 154 } 155 156 // FlushAndSync flushes and fsync's the underlying group's data to disk. 157 // See auto#FlushAndSync 158 func (wal *BaseWAL) FlushAndSync() error { 159 return wal.group.FlushAndSync() 160 } 161 162 // Stop the underlying autofile group. 163 // Use Wait() to ensure it's finished shutting down 164 // before cleaning up files. 165 func (wal *BaseWAL) OnStop() { 166 wal.flushTicker.Stop() 167 if err := wal.FlushAndSync(); err != nil { 168 wal.Logger.Error("error on flush data to disk", "error", err) 169 } 170 if err := wal.group.Stop(); err != nil { 171 wal.Logger.Error("error trying to stop wal", "error", err) 172 } 173 wal.group.Close() 174 } 175 176 // Wait for the underlying autofile group to finish shutting down 177 // so it's safe to cleanup files. 178 func (wal *BaseWAL) Wait() { 179 wal.group.Wait() 180 } 181 182 // Write is called in newStep and for each receive on the 183 // peerMsgQueue and the timeoutTicker. 184 // NOTE: does not call fsync() 185 func (wal *BaseWAL) Write(msg WALMessage) error { 186 if wal == nil { 187 return nil 188 } 189 190 if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil { 191 wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height", 192 "err", err, "msg", msg) 193 return err 194 } 195 196 return nil 197 } 198 199 // WriteSync is called when we receive a msg from ourselves 200 // so that we write to disk before sending signed messages. 201 // NOTE: calls fsync() 202 func (wal *BaseWAL) WriteSync(msg WALMessage) error { 203 if wal == nil { 204 return nil 205 } 206 207 if err := wal.Write(msg); err != nil { 208 return err 209 } 210 211 if err := wal.FlushAndSync(); err != nil { 212 wal.Logger.Error(`WriteSync failed to flush consensus wal. 213 WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, 214 "err", err) 215 return err 216 } 217 218 return nil 219 } 220 221 // WALSearchOptions are optional arguments to SearchForEndHeight. 222 type WALSearchOptions struct { 223 // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. 224 IgnoreDataCorruptionErrors bool 225 } 226 227 // SearchForEndHeight searches for the EndHeightMessage with the given height 228 // and returns an auto.GroupReader, whenever it was found or not and an error. 229 // Group reader will be nil if found equals false. 230 // 231 // CONTRACT: caller must close group reader. 232 func (wal *BaseWAL) SearchForEndHeight( 233 height int64, 234 options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 235 var ( 236 msg *TimedWALMessage 237 gr *auto.GroupReader 238 ) 239 lastHeightFound := int64(-1) 240 241 // NOTE: starting from the last file in the group because we're usually 242 // searching for the last height. See replay.go 243 min, max := wal.group.MinIndex(), wal.group.MaxIndex() 244 wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) 245 for index := max; index >= min; index-- { 246 gr, err = wal.group.NewReader(index) 247 if err != nil { 248 return nil, false, err 249 } 250 251 dec := NewWALDecoder(gr) 252 for { 253 msg, err = dec.Decode() 254 if err == io.EOF { 255 // OPTIMISATION: no need to look for height in older files if we've seen h < height 256 if lastHeightFound > 0 && lastHeightFound < height { 257 gr.Close() 258 return nil, false, nil 259 } 260 // check next file 261 break 262 } 263 if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { 264 wal.Logger.Error("Corrupted entry. Skipping...", "err", err) 265 // do nothing 266 continue 267 } else if err != nil { 268 gr.Close() 269 return nil, false, err 270 } 271 272 if m, ok := msg.Msg.(EndHeightMessage); ok { 273 lastHeightFound = m.Height 274 if m.Height == height { // found 275 wal.Logger.Info("Found", "height", height, "index", index) 276 return gr, true, nil 277 } 278 } 279 } 280 gr.Close() 281 } 282 283 return nil, false, nil 284 } 285 286 // A WALEncoder writes custom-encoded WAL messages to an output stream. 287 // 288 // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value 289 type WALEncoder struct { 290 wr io.Writer 291 } 292 293 // NewWALEncoder returns a new encoder that writes to wr. 294 func NewWALEncoder(wr io.Writer) *WALEncoder { 295 return &WALEncoder{wr} 296 } 297 298 // Encode writes the custom encoding of v to the stream. It returns an error if 299 // the encoded size of v is greater than 1MB. Any error encountered 300 // during the write is also returned. 301 func (enc *WALEncoder) Encode(v *TimedWALMessage) error { 302 pbMsg, err := WALToProto(v.Msg) 303 if err != nil { 304 return err 305 } 306 pv := tmcons.TimedWALMessage{ 307 Time: v.Time, 308 Msg: pbMsg, 309 } 310 311 data, err := proto.Marshal(&pv) 312 if err != nil { 313 panic(fmt.Errorf("encode timed wall message failure: %w", err)) 314 } 315 316 crc := crc32.Checksum(data, crc32c) 317 length := uint32(len(data)) 318 if length > maxMsgSizeBytes { 319 return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) 320 } 321 totalLength := 8 + int(length) 322 323 msg := make([]byte, totalLength) 324 binary.BigEndian.PutUint32(msg[0:4], crc) 325 binary.BigEndian.PutUint32(msg[4:8], length) 326 copy(msg[8:], data) 327 328 _, err = enc.wr.Write(msg) 329 return err 330 } 331 332 // IsDataCorruptionError returns true if data has been corrupted inside WAL. 333 func IsDataCorruptionError(err error) bool { 334 _, ok := err.(DataCorruptionError) 335 return ok 336 } 337 338 // DataCorruptionError is an error that occures if data on disk was corrupted. 339 type DataCorruptionError struct { 340 cause error 341 } 342 343 func (e DataCorruptionError) Error() string { 344 return fmt.Sprintf("DataCorruptionError[%v]", e.cause) 345 } 346 347 func (e DataCorruptionError) Cause() error { 348 return e.cause 349 } 350 351 // A WALDecoder reads and decodes custom-encoded WAL messages from an input 352 // stream. See WALEncoder for the format used. 353 // 354 // It will also compare the checksums and make sure data size is equal to the 355 // length from the header. If that is not the case, error will be returned. 356 type WALDecoder struct { 357 rd io.Reader 358 } 359 360 // NewWALDecoder returns a new decoder that reads from rd. 361 func NewWALDecoder(rd io.Reader) *WALDecoder { 362 return &WALDecoder{rd} 363 } 364 365 // Decode reads the next custom-encoded value from its reader and returns it. 366 func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { 367 b := make([]byte, 4) 368 369 _, err := dec.rd.Read(b) 370 if errors.Is(err, io.EOF) { 371 return nil, err 372 } 373 if err != nil { 374 return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} 375 } 376 crc := binary.BigEndian.Uint32(b) 377 378 b = make([]byte, 4) 379 _, err = dec.rd.Read(b) 380 if err != nil { 381 return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} 382 } 383 length := binary.BigEndian.Uint32(b) 384 385 if length > maxMsgSizeBytes { 386 return nil, DataCorruptionError{fmt.Errorf( 387 "length %d exceeded maximum possible value of %d bytes", 388 length, 389 maxMsgSizeBytes)} 390 } 391 392 data := make([]byte, length) 393 n, err := dec.rd.Read(data) 394 if err != nil { 395 return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} 396 } 397 398 // check checksum before decoding data 399 actualCRC := crc32.Checksum(data, crc32c) 400 if actualCRC != crc { 401 return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} 402 } 403 404 var res = new(tmcons.TimedWALMessage) 405 err = proto.Unmarshal(data, res) 406 if err != nil { 407 return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} 408 } 409 410 walMsg, err := WALFromProto(res.Msg) 411 if err != nil { 412 return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)} 413 } 414 tMsgWal := &TimedWALMessage{ 415 Time: res.Time, 416 Msg: walMsg, 417 } 418 419 return tMsgWal, err 420 } 421 422 type nilWAL struct{} 423 424 var _ WAL = nilWAL{} 425 426 func (nilWAL) Write(m WALMessage) error { return nil } 427 func (nilWAL) WriteSync(m WALMessage) error { return nil } 428 func (nilWAL) FlushAndSync() error { return nil } 429 func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) { 430 return nil, false, nil 431 } 432 func (nilWAL) Start() error { return nil } 433 func (nilWAL) Stop() error { return nil } 434 func (nilWAL) Wait() {}