github.com/516108736/tendermint@v0.36.0/consensus/wal.go (about)

     1  package consensus
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"fmt"
     7  	"hash/crc32"
     8  	"io"
     9  	"path/filepath"
    10  	"time"
    11  
    12  	"github.com/gogo/protobuf/proto"
    13  
    14  	auto "github.com/tendermint/tendermint/libs/autofile"
    15  	tmjson "github.com/tendermint/tendermint/libs/json"
    16  	"github.com/tendermint/tendermint/libs/log"
    17  	tmos "github.com/tendermint/tendermint/libs/os"
    18  	"github.com/tendermint/tendermint/libs/service"
    19  	tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus"
    20  	tmtime "github.com/tendermint/tendermint/types/time"
    21  )
    22  
    23  const (
    24  	// time.Time + max consensus msg size
    25  	maxMsgSizeBytes = maxMsgSize + 24
    26  
    27  	// how often the WAL should be sync'd during period sync'ing
    28  	walDefaultFlushInterval = 2 * time.Second
    29  )
    30  
    31  //--------------------------------------------------------
    32  // types and functions for savings consensus messages
    33  
    34  // TimedWALMessage wraps WALMessage and adds Time for debugging purposes.
    35  type TimedWALMessage struct {
    36  	Time time.Time  `json:"time"`
    37  	Msg  WALMessage `json:"msg"`
    38  }
    39  
    40  // EndHeightMessage marks the end of the given height inside WAL.
    41  // @internal used by scripts/wal2json util.
    42  type EndHeightMessage struct {
    43  	Height int64 `json:"height"`
    44  }
    45  
    46  type WALMessage interface{}
    47  
    48  func init() {
    49  	tmjson.RegisterType(msgInfo{}, "tendermint/wal/MsgInfo")
    50  	tmjson.RegisterType(timeoutInfo{}, "tendermint/wal/TimeoutInfo")
    51  	tmjson.RegisterType(EndHeightMessage{}, "tendermint/wal/EndHeightMessage")
    52  }
    53  
    54  //--------------------------------------------------------
    55  // Simple write-ahead logger
    56  
    57  // WAL is an interface for any write-ahead logger.
    58  type WAL interface {
    59  	Write(WALMessage) error
    60  	WriteSync(WALMessage) error
    61  	FlushAndSync() error
    62  
    63  	SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error)
    64  
    65  	// service methods
    66  	Start() error
    67  	Stop() error
    68  	Wait()
    69  }
    70  
    71  // Write ahead logger writes msgs to disk before they are processed.
    72  // Can be used for crash-recovery and deterministic replay.
    73  // TODO: currently the wal is overwritten during replay catchup, give it a mode
    74  // so it's either reading or appending - must read to end to start appending
    75  // again.
    76  type BaseWAL struct {
    77  	service.BaseService
    78  
    79  	group *auto.Group
    80  
    81  	enc *WALEncoder
    82  
    83  	flushTicker   *time.Ticker
    84  	flushInterval time.Duration
    85  }
    86  
    87  var _ WAL = &BaseWAL{}
    88  
    89  // NewWAL returns a new write-ahead logger based on `baseWAL`, which implements
    90  // WAL. It's flushed and synced to disk every 2s and once when stopped.
    91  func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) {
    92  	err := tmos.EnsureDir(filepath.Dir(walFile), 0700)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err)
    95  	}
    96  
    97  	group, err := auto.OpenGroup(walFile, groupOptions...)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  	wal := &BaseWAL{
   102  		group:         group,
   103  		enc:           NewWALEncoder(group),
   104  		flushInterval: walDefaultFlushInterval,
   105  	}
   106  	wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal)
   107  	return wal, nil
   108  }
   109  
   110  // SetFlushInterval allows us to override the periodic flush interval for the WAL.
   111  func (wal *BaseWAL) SetFlushInterval(i time.Duration) {
   112  	wal.flushInterval = i
   113  }
   114  
   115  func (wal *BaseWAL) Group() *auto.Group {
   116  	return wal.group
   117  }
   118  
   119  func (wal *BaseWAL) SetLogger(l log.Logger) {
   120  	wal.BaseService.Logger = l
   121  	wal.group.SetLogger(l)
   122  }
   123  
   124  func (wal *BaseWAL) OnStart() error {
   125  	size, err := wal.group.Head.Size()
   126  	if err != nil {
   127  		return err
   128  	} else if size == 0 {
   129  		if err := wal.WriteSync(EndHeightMessage{0}); err != nil {
   130  			return err
   131  		}
   132  	}
   133  	err = wal.group.Start()
   134  	if err != nil {
   135  		return err
   136  	}
   137  	wal.flushTicker = time.NewTicker(wal.flushInterval)
   138  	go wal.processFlushTicks()
   139  	return nil
   140  }
   141  
   142  func (wal *BaseWAL) processFlushTicks() {
   143  	for {
   144  		select {
   145  		case <-wal.flushTicker.C:
   146  			if err := wal.FlushAndSync(); err != nil {
   147  				wal.Logger.Error("Periodic WAL flush failed", "err", err)
   148  			}
   149  		case <-wal.Quit():
   150  			return
   151  		}
   152  	}
   153  }
   154  
   155  // FlushAndSync flushes and fsync's the underlying group's data to disk.
   156  // See auto#FlushAndSync
   157  func (wal *BaseWAL) FlushAndSync() error {
   158  	return wal.group.FlushAndSync()
   159  }
   160  
   161  // Stop the underlying autofile group.
   162  // Use Wait() to ensure it's finished shutting down
   163  // before cleaning up files.
   164  func (wal *BaseWAL) OnStop() {
   165  	wal.flushTicker.Stop()
   166  	if err := wal.FlushAndSync(); err != nil {
   167  		wal.Logger.Error("error on flush data to disk", "error", err)
   168  	}
   169  	if err := wal.group.Stop(); err != nil {
   170  		wal.Logger.Error("error trying to stop wal", "error", err)
   171  	}
   172  	wal.group.Close()
   173  }
   174  
   175  // Wait for the underlying autofile group to finish shutting down
   176  // so it's safe to cleanup files.
   177  func (wal *BaseWAL) Wait() {
   178  	wal.group.Wait()
   179  }
   180  
   181  // Write is called in newStep and for each receive on the
   182  // peerMsgQueue and the timeoutTicker.
   183  // NOTE: does not call fsync()
   184  func (wal *BaseWAL) Write(msg WALMessage) error {
   185  	if wal == nil {
   186  		return nil
   187  	}
   188  
   189  	if err := wal.enc.Encode(&TimedWALMessage{tmtime.Now(), msg}); err != nil {
   190  		wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height",
   191  			"err", err, "msg", msg)
   192  		return err
   193  	}
   194  
   195  	return nil
   196  }
   197  
   198  // WriteSync is called when we receive a msg from ourselves
   199  // so that we write to disk before sending signed messages.
   200  // NOTE: calls fsync()
   201  func (wal *BaseWAL) WriteSync(msg WALMessage) error {
   202  	if wal == nil {
   203  		return nil
   204  	}
   205  
   206  	if err := wal.Write(msg); err != nil {
   207  		return err
   208  	}
   209  
   210  	if err := wal.FlushAndSync(); err != nil {
   211  		wal.Logger.Error(`WriteSync failed to flush consensus wal.
   212  		WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`,
   213  			"err", err)
   214  		return err
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  // WALSearchOptions are optional arguments to SearchForEndHeight.
   221  type WALSearchOptions struct {
   222  	// IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors.
   223  	IgnoreDataCorruptionErrors bool
   224  }
   225  
   226  // SearchForEndHeight searches for the EndHeightMessage with the given height
   227  // and returns an auto.GroupReader, whenever it was found or not and an error.
   228  // Group reader will be nil if found equals false.
   229  //
   230  // CONTRACT: caller must close group reader.
   231  func (wal *BaseWAL) SearchForEndHeight(
   232  	height int64,
   233  	options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {
   234  	var (
   235  		msg *TimedWALMessage
   236  		gr  *auto.GroupReader
   237  	)
   238  	lastHeightFound := int64(-1)
   239  
   240  	// NOTE: starting from the last file in the group because we're usually
   241  	// searching for the last height. See replay.go
   242  	min, max := wal.group.MinIndex(), wal.group.MaxIndex()
   243  	wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max)
   244  	for index := max; index >= min; index-- {
   245  		gr, err = wal.group.NewReader(index)
   246  		if err != nil {
   247  			return nil, false, err
   248  		}
   249  
   250  		dec := NewWALDecoder(gr)
   251  		for {
   252  			msg, err = dec.Decode()
   253  			if err == io.EOF {
   254  				// OPTIMISATION: no need to look for height in older files if we've seen h < height
   255  				if lastHeightFound > 0 && lastHeightFound < height {
   256  					gr.Close()
   257  					return nil, false, nil
   258  				}
   259  				// check next file
   260  				break
   261  			}
   262  			if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) {
   263  				wal.Logger.Error("Corrupted entry. Skipping...", "err", err)
   264  				// do nothing
   265  				continue
   266  			} else if err != nil {
   267  				gr.Close()
   268  				return nil, false, err
   269  			}
   270  
   271  			if m, ok := msg.Msg.(EndHeightMessage); ok {
   272  				lastHeightFound = m.Height
   273  				if m.Height == height { // found
   274  					wal.Logger.Info("Found", "height", height, "index", index)
   275  					return gr, true, nil
   276  				}
   277  			}
   278  		}
   279  		gr.Close()
   280  	}
   281  
   282  	return nil, false, nil
   283  }
   284  
   285  // A WALEncoder writes custom-encoded WAL messages to an output stream.
   286  //
   287  // Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value
   288  type WALEncoder struct {
   289  	wr io.Writer
   290  }
   291  
   292  // NewWALEncoder returns a new encoder that writes to wr.
   293  func NewWALEncoder(wr io.Writer) *WALEncoder {
   294  	return &WALEncoder{wr}
   295  }
   296  
   297  // Encode writes the custom encoding of v to the stream. It returns an error if
   298  // the encoded size of v is greater than 1MB. Any error encountered
   299  // during the write is also returned.
   300  func (enc *WALEncoder) Encode(v *TimedWALMessage) error {
   301  	pbMsg, err := WALToProto(v.Msg)
   302  	if err != nil {
   303  		return err
   304  	}
   305  	pv := tmcons.TimedWALMessage{
   306  		Time: v.Time,
   307  		Msg:  pbMsg,
   308  	}
   309  
   310  	data, err := proto.Marshal(&pv)
   311  	if err != nil {
   312  		panic(fmt.Errorf("encode timed wall message failure: %w", err))
   313  	}
   314  
   315  	crc := crc32.Checksum(data, crc32c)
   316  	length := uint32(len(data))
   317  	if length > maxMsgSizeBytes {
   318  		return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes)
   319  	}
   320  	totalLength := 8 + int(length)
   321  
   322  	msg := make([]byte, totalLength)
   323  	binary.BigEndian.PutUint32(msg[0:4], crc)
   324  	binary.BigEndian.PutUint32(msg[4:8], length)
   325  	copy(msg[8:], data)
   326  
   327  	_, err = enc.wr.Write(msg)
   328  	return err
   329  }
   330  
   331  // IsDataCorruptionError returns true if data has been corrupted inside WAL.
   332  func IsDataCorruptionError(err error) bool {
   333  	_, ok := err.(DataCorruptionError)
   334  	return ok
   335  }
   336  
   337  // DataCorruptionError is an error that occures if data on disk was corrupted.
   338  type DataCorruptionError struct {
   339  	cause error
   340  }
   341  
   342  func (e DataCorruptionError) Error() string {
   343  	return fmt.Sprintf("DataCorruptionError[%v]", e.cause)
   344  }
   345  
   346  func (e DataCorruptionError) Cause() error {
   347  	return e.cause
   348  }
   349  
   350  // A WALDecoder reads and decodes custom-encoded WAL messages from an input
   351  // stream. See WALEncoder for the format used.
   352  //
   353  // It will also compare the checksums and make sure data size is equal to the
   354  // length from the header. If that is not the case, error will be returned.
   355  type WALDecoder struct {
   356  	rd io.Reader
   357  }
   358  
   359  // NewWALDecoder returns a new decoder that reads from rd.
   360  func NewWALDecoder(rd io.Reader) *WALDecoder {
   361  	return &WALDecoder{rd}
   362  }
   363  
   364  // Decode reads the next custom-encoded value from its reader and returns it.
   365  func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {
   366  	b := make([]byte, 4)
   367  
   368  	_, err := dec.rd.Read(b)
   369  	if errors.Is(err, io.EOF) {
   370  		return nil, err
   371  	}
   372  	if err != nil {
   373  		return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)}
   374  	}
   375  	crc := binary.BigEndian.Uint32(b)
   376  
   377  	b = make([]byte, 4)
   378  	_, err = dec.rd.Read(b)
   379  	if err != nil {
   380  		return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)}
   381  	}
   382  	length := binary.BigEndian.Uint32(b)
   383  
   384  	if length > maxMsgSizeBytes {
   385  		return nil, DataCorruptionError{fmt.Errorf(
   386  			"length %d exceeded maximum possible value of %d bytes",
   387  			length,
   388  			maxMsgSizeBytes)}
   389  	}
   390  
   391  	data := make([]byte, length)
   392  	n, err := dec.rd.Read(data)
   393  	if err != nil {
   394  		return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)}
   395  	}
   396  
   397  	// check checksum before decoding data
   398  	actualCRC := crc32.Checksum(data, crc32c)
   399  	if actualCRC != crc {
   400  		return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)}
   401  	}
   402  
   403  	var res = new(tmcons.TimedWALMessage)
   404  	err = proto.Unmarshal(data, res)
   405  	if err != nil {
   406  		return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)}
   407  	}
   408  
   409  	walMsg, err := WALFromProto(res.Msg)
   410  	if err != nil {
   411  		return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)}
   412  	}
   413  	tMsgWal := &TimedWALMessage{
   414  		Time: res.Time,
   415  		Msg:  walMsg,
   416  	}
   417  
   418  	return tMsgWal, err
   419  }
   420  
   421  type nilWAL struct{}
   422  
   423  var _ WAL = nilWAL{}
   424  
   425  func (nilWAL) Write(m WALMessage) error     { return nil }
   426  func (nilWAL) WriteSync(m WALMessage) error { return nil }
   427  func (nilWAL) FlushAndSync() error          { return nil }
   428  func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {
   429  	return nil, false, nil
   430  }
   431  func (nilWAL) Start() error { return nil }
   432  func (nilWAL) Stop() error  { return nil }
   433  func (nilWAL) Wait()        {}