github.com/evdatsion/aphelion-dpos-bft@v0.32.1/consensus/wal.go (about)

     1  package consensus
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  	"hash/crc32"
     7  	"io"
     8  	"path/filepath"
     9  	"time"
    10  
    11  	"github.com/pkg/errors"
    12  
    13  	amino "github.com/evdatsion/go-amino"
    14  	auto "github.com/evdatsion/aphelion-dpos-bft/libs/autofile"
    15  	cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common"
    16  	"github.com/evdatsion/aphelion-dpos-bft/libs/log"
    17  	"github.com/evdatsion/aphelion-dpos-bft/types"
    18  	tmtime "github.com/evdatsion/aphelion-dpos-bft/types/time"
    19  )
    20  
    21  const (
    22  	// must be greater than types.BlockPartSizeBytes + a few bytes
    23  	maxMsgSizeBytes = 1024 * 1024 // 1MB
    24  
    25  	// how often the WAL should be sync'd during period sync'ing
    26  	walDefaultFlushInterval = 2 * time.Second
    27  )
    28  
    29  //--------------------------------------------------------
    30  // types and functions for savings consensus messages
    31  
    32  type TimedWALMessage struct {
    33  	Time time.Time  `json:"time"` // for debugging purposes
    34  	Msg  WALMessage `json:"msg"`
    35  }
    36  
    37  // EndHeightMessage marks the end of the given height inside WAL.
    38  // @internal used by scripts/wal2json util.
    39  type EndHeightMessage struct {
    40  	Height int64 `json:"height"`
    41  }
    42  
    43  type WALMessage interface{}
    44  
    45  func RegisterWALMessages(cdc *amino.Codec) {
    46  	cdc.RegisterInterface((*WALMessage)(nil), nil)
    47  	cdc.RegisterConcrete(types.EventDataRoundState{}, "tendermint/wal/EventDataRoundState", nil)
    48  	cdc.RegisterConcrete(msgInfo{}, "tendermint/wal/MsgInfo", nil)
    49  	cdc.RegisterConcrete(timeoutInfo{}, "tendermint/wal/TimeoutInfo", nil)
    50  	cdc.RegisterConcrete(EndHeightMessage{}, "tendermint/wal/EndHeightMessage", nil)
    51  }
    52  
    53  //--------------------------------------------------------
    54  // Simple write-ahead logger
    55  
    56  // WAL is an interface for any write-ahead logger.
    57  type WAL interface {
    58  	Write(WALMessage)
    59  	WriteSync(WALMessage)
    60  	FlushAndSync() error
    61  
    62  	SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error)
    63  
    64  	// service methods
    65  	Start() error
    66  	Stop() error
    67  	Wait()
    68  }
    69  
    70  // Write ahead logger writes msgs to disk before they are processed.
    71  // Can be used for crash-recovery and deterministic replay.
    72  // TODO: currently the wal is overwritten during replay catchup, give it a mode
    73  // so it's either reading or appending - must read to end to start appending
    74  // again.
    75  type baseWAL struct {
    76  	cmn.BaseService
    77  
    78  	group *auto.Group
    79  
    80  	enc *WALEncoder
    81  
    82  	flushTicker   *time.Ticker
    83  	flushInterval time.Duration
    84  }
    85  
    86  var _ WAL = &baseWAL{}
    87  
    88  // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements
    89  // WAL. It's flushed and synced to disk every 2s and once when stopped.
    90  func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*baseWAL, error) {
    91  	err := cmn.EnsureDir(filepath.Dir(walFile), 0700)
    92  	if err != nil {
    93  		return nil, errors.Wrap(err, "failed to ensure WAL directory is in place")
    94  	}
    95  
    96  	group, err := auto.OpenGroup(walFile, groupOptions...)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	wal := &baseWAL{
   101  		group:         group,
   102  		enc:           NewWALEncoder(group),
   103  		flushInterval: walDefaultFlushInterval,
   104  	}
   105  	wal.BaseService = *cmn.NewBaseService(nil, "baseWAL", wal)
   106  	return wal, nil
   107  }
   108  
   109  // SetFlushInterval allows us to override the periodic flush interval for the WAL.
   110  func (wal *baseWAL) SetFlushInterval(i time.Duration) {
   111  	wal.flushInterval = i
   112  }
   113  
   114  func (wal *baseWAL) Group() *auto.Group {
   115  	return wal.group
   116  }
   117  
   118  func (wal *baseWAL) SetLogger(l log.Logger) {
   119  	wal.BaseService.Logger = l
   120  	wal.group.SetLogger(l)
   121  }
   122  
   123  func (wal *baseWAL) OnStart() error {
   124  	size, err := wal.group.Head.Size()
   125  	if err != nil {
   126  		return err
   127  	} else if size == 0 {
   128  		wal.WriteSync(EndHeightMessage{0})
   129  	}
   130  	err = wal.group.Start()
   131  	if err != nil {
   132  		return err
   133  	}
   134  	wal.flushTicker = time.NewTicker(wal.flushInterval)
   135  	go wal.processFlushTicks()
   136  	return nil
   137  }
   138  
   139  func (wal *baseWAL) processFlushTicks() {
   140  	for {
   141  		select {
   142  		case <-wal.flushTicker.C:
   143  			if err := wal.FlushAndSync(); err != nil {
   144  				wal.Logger.Error("Periodic WAL flush failed", "err", err)
   145  			}
   146  		case <-wal.Quit():
   147  			return
   148  		}
   149  	}
   150  }
   151  
   152  // FlushAndSync flushes and fsync's the underlying group's data to disk.
   153  // See auto#FlushAndSync
   154  func (wal *baseWAL) FlushAndSync() error {
   155  	return wal.group.FlushAndSync()
   156  }
   157  
   158  // Stop the underlying autofile group.
   159  // Use Wait() to ensure it's finished shutting down
   160  // before cleaning up files.
   161  func (wal *baseWAL) OnStop() {
   162  	wal.flushTicker.Stop()
   163  	wal.FlushAndSync()
   164  	wal.group.Stop()
   165  	wal.group.Close()
   166  }
   167  
   168  // Wait for the underlying autofile group to finish shutting down
   169  // so it's safe to cleanup files.
   170  func (wal *baseWAL) Wait() {
   171  	wal.group.Wait()
   172  }
   173  
   174  // Write is called in newStep and for each receive on the
   175  // peerMsgQueue and the timeoutTicker.
   176  // NOTE: does not call fsync()
   177  func (wal *baseWAL) Write(msg WALMessage) {
   178  	if wal == nil {
   179  		return
   180  	}
   181  
   182  	// Write the wal message
   183  	if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil {
   184  		panic(fmt.Sprintf("Error writing msg to consensus wal: %v \n\nMessage: %v", err, msg))
   185  	}
   186  }
   187  
   188  // WriteSync is called when we receive a msg from ourselves
   189  // so that we write to disk before sending signed messages.
   190  // NOTE: calls fsync()
   191  func (wal *baseWAL) WriteSync(msg WALMessage) {
   192  	if wal == nil {
   193  		return
   194  	}
   195  
   196  	wal.Write(msg)
   197  	if err := wal.FlushAndSync(); err != nil {
   198  		panic(fmt.Sprintf("Error flushing consensus wal buf to file. Error: %v \n", err))
   199  	}
   200  }
   201  
   202  // WALSearchOptions are optional arguments to SearchForEndHeight.
   203  type WALSearchOptions struct {
   204  	// IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors.
   205  	IgnoreDataCorruptionErrors bool
   206  }
   207  
   208  // SearchForEndHeight searches for the EndHeightMessage with the given height
   209  // and returns an auto.GroupReader, whenever it was found or not and an error.
   210  // Group reader will be nil if found equals false.
   211  //
   212  // CONTRACT: caller must close group reader.
   213  func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {
   214  	var (
   215  		msg *TimedWALMessage
   216  		gr  *auto.GroupReader
   217  	)
   218  	lastHeightFound := int64(-1)
   219  
   220  	// NOTE: starting from the last file in the group because we're usually
   221  	// searching for the last height. See replay.go
   222  	min, max := wal.group.MinIndex(), wal.group.MaxIndex()
   223  	wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max)
   224  	for index := max; index >= min; index-- {
   225  		gr, err = wal.group.NewReader(index)
   226  		if err != nil {
   227  			return nil, false, err
   228  		}
   229  
   230  		dec := NewWALDecoder(gr)
   231  		for {
   232  			msg, err = dec.Decode()
   233  			if err == io.EOF {
   234  				// OPTIMISATION: no need to look for height in older files if we've seen h < height
   235  				if lastHeightFound > 0 && lastHeightFound < height {
   236  					gr.Close()
   237  					return nil, false, nil
   238  				}
   239  				// check next file
   240  				break
   241  			}
   242  			if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) {
   243  				wal.Logger.Error("Corrupted entry. Skipping...", "err", err)
   244  				// do nothing
   245  				continue
   246  			} else if err != nil {
   247  				gr.Close()
   248  				return nil, false, err
   249  			}
   250  
   251  			if m, ok := msg.Msg.(EndHeightMessage); ok {
   252  				lastHeightFound = m.Height
   253  				if m.Height == height { // found
   254  					wal.Logger.Info("Found", "height", height, "index", index)
   255  					return gr, true, nil
   256  				}
   257  			}
   258  		}
   259  		gr.Close()
   260  	}
   261  
   262  	return nil, false, nil
   263  }
   264  
   265  ///////////////////////////////////////////////////////////////////////////////
   266  
   267  // A WALEncoder writes custom-encoded WAL messages to an output stream.
   268  //
   269  // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value (go-amino encoded)
   270  type WALEncoder struct {
   271  	wr io.Writer
   272  }
   273  
   274  // NewWALEncoder returns a new encoder that writes to wr.
   275  func NewWALEncoder(wr io.Writer) *WALEncoder {
   276  	return &WALEncoder{wr}
   277  }
   278  
   279  // Encode writes the custom encoding of v to the stream. It returns an error if
   280  // the amino-encoded size of v is greater than 1MB. Any error encountered
   281  // during the write is also returned.
   282  func (enc *WALEncoder) Encode(v *TimedWALMessage) error {
   283  	data := cdc.MustMarshalBinaryBare(v)
   284  
   285  	crc := crc32.Checksum(data, crc32c)
   286  	length := uint32(len(data))
   287  	if length > maxMsgSizeBytes {
   288  		return fmt.Errorf("Msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes)
   289  	}
   290  	totalLength := 8 + int(length)
   291  
   292  	msg := make([]byte, totalLength)
   293  	binary.BigEndian.PutUint32(msg[0:4], crc)
   294  	binary.BigEndian.PutUint32(msg[4:8], length)
   295  	copy(msg[8:], data)
   296  
   297  	_, err := enc.wr.Write(msg)
   298  
   299  	return err
   300  }
   301  
   302  ///////////////////////////////////////////////////////////////////////////////
   303  
   304  // IsDataCorruptionError returns true if data has been corrupted inside WAL.
   305  func IsDataCorruptionError(err error) bool {
   306  	_, ok := err.(DataCorruptionError)
   307  	return ok
   308  }
   309  
   310  // DataCorruptionError is an error that occures if data on disk was corrupted.
   311  type DataCorruptionError struct {
   312  	cause error
   313  }
   314  
   315  func (e DataCorruptionError) Error() string {
   316  	return fmt.Sprintf("DataCorruptionError[%v]", e.cause)
   317  }
   318  
   319  func (e DataCorruptionError) Cause() error {
   320  	return e.cause
   321  }
   322  
   323  // A WALDecoder reads and decodes custom-encoded WAL messages from an input
   324  // stream. See WALEncoder for the format used.
   325  //
   326  // It will also compare the checksums and make sure data size is equal to the
   327  // length from the header. If that is not the case, error will be returned.
   328  type WALDecoder struct {
   329  	rd io.Reader
   330  }
   331  
   332  // NewWALDecoder returns a new decoder that reads from rd.
   333  func NewWALDecoder(rd io.Reader) *WALDecoder {
   334  	return &WALDecoder{rd}
   335  }
   336  
   337  // Decode reads the next custom-encoded value from its reader and returns it.
   338  func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {
   339  	b := make([]byte, 4)
   340  
   341  	_, err := dec.rd.Read(b)
   342  	if err == io.EOF {
   343  		return nil, err
   344  	}
   345  	if err != nil {
   346  		return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)}
   347  	}
   348  	crc := binary.BigEndian.Uint32(b)
   349  
   350  	b = make([]byte, 4)
   351  	_, err = dec.rd.Read(b)
   352  	if err != nil {
   353  		return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)}
   354  	}
   355  	length := binary.BigEndian.Uint32(b)
   356  
   357  	if length > maxMsgSizeBytes {
   358  		return nil, DataCorruptionError{fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)}
   359  	}
   360  
   361  	data := make([]byte, length)
   362  	n, err := dec.rd.Read(data)
   363  	if err != nil {
   364  		return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)}
   365  	}
   366  
   367  	// check checksum before decoding data
   368  	actualCRC := crc32.Checksum(data, crc32c)
   369  	if actualCRC != crc {
   370  		return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)}
   371  	}
   372  
   373  	var res = new(TimedWALMessage) // nolint: gosimple
   374  	err = cdc.UnmarshalBinaryBare(data, res)
   375  	if err != nil {
   376  		return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)}
   377  	}
   378  
   379  	return res, err
   380  }
   381  
   382  type nilWAL struct{}
   383  
   384  var _ WAL = nilWAL{}
   385  
   386  func (nilWAL) Write(m WALMessage)     {}
   387  func (nilWAL) WriteSync(m WALMessage) {}
   388  func (nilWAL) FlushAndSync() error    { return nil }
   389  func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {
   390  	return nil, false, nil
   391  }
   392  func (nilWAL) Start() error { return nil }
   393  func (nilWAL) Stop() error  { return nil }
   394  func (nilWAL) Wait()        {}