github.com/true-sqn/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/storage.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft
     8  
     9  import (
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"sort"
    15  	"strings"
    16  
    17  	"github.com/hyperledger/fabric/common/flogging"
    18  	"github.com/pkg/errors"
    19  	"go.etcd.io/etcd/etcdserver/api/snap"
    20  	"go.etcd.io/etcd/pkg/fileutil"
    21  	"go.etcd.io/etcd/raft"
    22  	"go.etcd.io/etcd/raft/raftpb"
    23  	"go.etcd.io/etcd/wal"
    24  	"go.etcd.io/etcd/wal/walpb"
    25  )
    26  
    27  // MaxSnapshotFiles defines max number of etcd/raft snapshot files to retain
    28  // on filesystem. Snapshot files are read from newest to oldest, until first
    29  // intact file is found. The more snapshot files we keep around, the more we
    30  // mitigate the impact of a corrupted snapshots. This is exported for testing
    31  // purpose. This MUST be greater equal than 1.
    32  var MaxSnapshotFiles = 4
    33  
    34  // MemoryStorage is currently backed by etcd/raft.MemoryStorage. This interface is
    35  // defined to expose dependencies of fsm so that it may be swapped in the
    36  // future. TODO(jay) Add other necessary methods to this interface once we need
    37  // them in implementation, e.g. ApplySnapshot.
    38  type MemoryStorage interface {
    39  	raft.Storage
    40  	Append(entries []raftpb.Entry) error
    41  	SetHardState(st raftpb.HardState) error
    42  	CreateSnapshot(i uint64, cs *raftpb.ConfState, data []byte) (raftpb.Snapshot, error)
    43  	Compact(compactIndex uint64) error
    44  	ApplySnapshot(snap raftpb.Snapshot) error
    45  }
    46  
    47  // RaftStorage encapsulates storages needed for etcd/raft data, i.e. memory, wal
    48  type RaftStorage struct {
    49  	SnapshotCatchUpEntries uint64
    50  
    51  	walDir  string
    52  	snapDir string
    53  
    54  	lg *flogging.FabricLogger
    55  
    56  	ram  MemoryStorage
    57  	wal  *wal.WAL
    58  	snap *snap.Snapshotter
    59  
    60  	// a queue that keeps track of indices of snapshots on disk
    61  	snapshotIndex []uint64
    62  }
    63  
    64  // CreateStorage attempts to create a storage to persist etcd/raft data.
    65  // If data presents in specified disk, they are loaded to reconstruct storage state.
    66  func CreateStorage(
    67  	lg *flogging.FabricLogger,
    68  	walDir string,
    69  	snapDir string,
    70  	ram MemoryStorage,
    71  ) (*RaftStorage, error) {
    72  
    73  	sn, err := createSnapshotter(lg, snapDir)
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  
    78  	snapshot, err := sn.Load()
    79  	if err != nil {
    80  		if err == snap.ErrNoSnapshot {
    81  			lg.Debugf("No snapshot found at %s", snapDir)
    82  		} else {
    83  			return nil, errors.Errorf("failed to load snapshot: %s", err)
    84  		}
    85  	} else {
    86  		// snapshot found
    87  		lg.Debugf("Loaded snapshot at Term %d and Index %d, Nodes: %+v",
    88  			snapshot.Metadata.Term, snapshot.Metadata.Index, snapshot.Metadata.ConfState.Nodes)
    89  	}
    90  
    91  	w, st, ents, err := createOrReadWAL(lg, walDir, snapshot)
    92  	if err != nil {
    93  		return nil, errors.Errorf("failed to create or read WAL: %s", err)
    94  	}
    95  
    96  	if snapshot != nil {
    97  		lg.Debugf("Applying snapshot to raft MemoryStorage")
    98  		if err := ram.ApplySnapshot(*snapshot); err != nil {
    99  			return nil, errors.Errorf("Failed to apply snapshot to memory: %s", err)
   100  		}
   101  	}
   102  
   103  	lg.Debugf("Setting HardState to {Term: %d, Commit: %d}", st.Term, st.Commit)
   104  	ram.SetHardState(st) // MemoryStorage.SetHardState always returns nil
   105  
   106  	lg.Debugf("Appending %d entries to memory storage", len(ents))
   107  	ram.Append(ents) // MemoryStorage.Append always return nil
   108  
   109  	return &RaftStorage{
   110  		lg:            lg,
   111  		ram:           ram,
   112  		wal:           w,
   113  		snap:          sn,
   114  		walDir:        walDir,
   115  		snapDir:       snapDir,
   116  		snapshotIndex: ListSnapshots(lg, snapDir),
   117  	}, nil
   118  }
   119  
   120  // ListSnapshots returns a list of RaftIndex of snapshots stored on disk.
   121  // If a file is corrupted, rename the file.
   122  func ListSnapshots(logger *flogging.FabricLogger, snapDir string) []uint64 {
   123  	dir, err := os.Open(snapDir)
   124  	if err != nil {
   125  		logger.Errorf("Failed to open snapshot directory %s: %s", snapDir, err)
   126  		return nil
   127  	}
   128  	defer dir.Close()
   129  
   130  	filenames, err := dir.Readdirnames(-1)
   131  	if err != nil {
   132  		logger.Errorf("Failed to read snapshot files: %s", err)
   133  		return nil
   134  	}
   135  
   136  	snapfiles := []string{}
   137  	for i := range filenames {
   138  		if strings.HasSuffix(filenames[i], ".snap") {
   139  			snapfiles = append(snapfiles, filenames[i])
   140  		}
   141  	}
   142  	sort.Sort(sort.StringSlice(snapfiles))
   143  
   144  	var snapshots []uint64
   145  	for _, snapfile := range snapfiles {
   146  		fpath := filepath.Join(snapDir, snapfile)
   147  		s, err := snap.Read(logger.Zap(), fpath)
   148  		if err != nil {
   149  			logger.Errorf("Snapshot file %s is corrupted: %s", fpath, err)
   150  
   151  			broken := fpath + ".broken"
   152  			if err = os.Rename(fpath, broken); err != nil {
   153  				logger.Errorf("Failed to rename corrupted snapshot file %s to %s: %s", fpath, broken, err)
   154  			} else {
   155  				logger.Debugf("Renaming corrupted snapshot file %s to %s", fpath, broken)
   156  			}
   157  
   158  			continue
   159  		}
   160  
   161  		snapshots = append(snapshots, s.Metadata.Index)
   162  	}
   163  
   164  	return snapshots
   165  }
   166  
   167  func createSnapshotter(logger *flogging.FabricLogger, snapDir string) (*snap.Snapshotter, error) {
   168  	if err := os.MkdirAll(snapDir, os.ModePerm); err != nil {
   169  		return nil, errors.Errorf("failed to mkdir '%s' for snapshot: %s", snapDir, err)
   170  	}
   171  
   172  	return snap.New(logger.Zap(), snapDir), nil
   173  }
   174  
   175  func createOrReadWAL(lg *flogging.FabricLogger, walDir string, snapshot *raftpb.Snapshot) (w *wal.WAL, st raftpb.HardState, ents []raftpb.Entry, err error) {
   176  	if !wal.Exist(walDir) {
   177  		lg.Infof("No WAL data found, creating new WAL at path '%s'", walDir)
   178  		// TODO(jay_guo) add metadata to be persisted with wal once we need it.
   179  		// use case could be data dump and restore on a new node.
   180  		w, err := wal.Create(lg.Zap(), walDir, nil)
   181  		if err == os.ErrExist {
   182  			lg.Fatalf("programming error, we've just checked that WAL does not exist")
   183  		}
   184  
   185  		if err != nil {
   186  			return nil, st, nil, errors.Errorf("failed to initialize WAL: %s", err)
   187  		}
   188  
   189  		if err = w.Close(); err != nil {
   190  			return nil, st, nil, errors.Errorf("failed to close the WAL just created: %s", err)
   191  		}
   192  	} else {
   193  		lg.Infof("Found WAL data at path '%s', replaying it", walDir)
   194  	}
   195  
   196  	walsnap := walpb.Snapshot{}
   197  	if snapshot != nil {
   198  		walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
   199  	}
   200  
   201  	lg.Debugf("Loading WAL at Term %d and Index %d", walsnap.Term, walsnap.Index)
   202  
   203  	var repaired bool
   204  	for {
   205  		if w, err = wal.Open(lg.Zap(), walDir, walsnap); err != nil {
   206  			return nil, st, nil, errors.Errorf("failed to open WAL: %s", err)
   207  		}
   208  
   209  		if _, st, ents, err = w.ReadAll(); err != nil {
   210  			lg.Warnf("Failed to read WAL: %s", err)
   211  
   212  			if errc := w.Close(); errc != nil {
   213  				return nil, st, nil, errors.Errorf("failed to close erroneous WAL: %s", errc)
   214  			}
   215  
   216  			// only repair UnexpectedEOF and only repair once
   217  			if repaired || err != io.ErrUnexpectedEOF {
   218  				return nil, st, nil, errors.Errorf("failed to read WAL and cannot repair: %s", err)
   219  			}
   220  
   221  			if !wal.Repair(lg.Zap(), walDir) {
   222  				return nil, st, nil, errors.Errorf("failed to repair WAL: %s", err)
   223  			}
   224  
   225  			repaired = true
   226  			// next loop should be able to open WAL and return
   227  			continue
   228  		}
   229  
   230  		// successfully opened WAL and read all entries, break
   231  		break
   232  	}
   233  
   234  	return w, st, ents, nil
   235  }
   236  
   237  // Snapshot returns the latest snapshot stored in memory
   238  func (rs *RaftStorage) Snapshot() raftpb.Snapshot {
   239  	sn, _ := rs.ram.Snapshot() // Snapshot always returns nil error
   240  	return sn
   241  }
   242  
   243  // Store persists etcd/raft data
   244  func (rs *RaftStorage) Store(entries []raftpb.Entry, hardstate raftpb.HardState, snapshot raftpb.Snapshot) error {
   245  	if err := rs.wal.Save(hardstate, entries); err != nil {
   246  		return err
   247  	}
   248  
   249  	if !raft.IsEmptySnap(snapshot) {
   250  		if err := rs.saveSnap(snapshot); err != nil {
   251  			return err
   252  		}
   253  
   254  		if err := rs.ram.ApplySnapshot(snapshot); err != nil {
   255  			if err == raft.ErrSnapOutOfDate {
   256  				rs.lg.Warnf("Attempted to apply out-of-date snapshot at Term %d and Index %d",
   257  					snapshot.Metadata.Term, snapshot.Metadata.Index)
   258  			} else {
   259  				rs.lg.Fatalf("Unexpected programming error: %s", err)
   260  			}
   261  		}
   262  	}
   263  
   264  	if err := rs.ram.Append(entries); err != nil {
   265  		return err
   266  	}
   267  
   268  	return nil
   269  }
   270  
   271  func (rs *RaftStorage) saveSnap(snap raftpb.Snapshot) error {
   272  	rs.lg.Infof("Persisting snapshot (term: %d, index: %d) to WAL and disk", snap.Metadata.Term, snap.Metadata.Index)
   273  
   274  	// must save the snapshot index to the WAL before saving the
   275  	// snapshot to maintain the invariant that we only Open the
   276  	// wal at previously-saved snapshot indexes.
   277  	walsnap := walpb.Snapshot{
   278  		Index: snap.Metadata.Index,
   279  		Term:  snap.Metadata.Term,
   280  	}
   281  
   282  	if err := rs.wal.SaveSnapshot(walsnap); err != nil {
   283  		return errors.Errorf("failed to save snapshot to WAL: %s", err)
   284  	}
   285  
   286  	if err := rs.snap.SaveSnap(snap); err != nil {
   287  		return errors.Errorf("failed to save snapshot to disk: %s", err)
   288  	}
   289  
   290  	rs.lg.Debugf("Releasing lock to wal files prior to %d", snap.Metadata.Index)
   291  	if err := rs.wal.ReleaseLockTo(snap.Metadata.Index); err != nil {
   292  		return err
   293  	}
   294  
   295  	return nil
   296  }
   297  
   298  // TakeSnapshot takes a snapshot at index i from MemoryStorage, and persists it to wal and disk.
   299  func (rs *RaftStorage) TakeSnapshot(i uint64, cs raftpb.ConfState, data []byte) error {
   300  	rs.lg.Debugf("Creating snapshot at index %d from MemoryStorage", i)
   301  	snap, err := rs.ram.CreateSnapshot(i, &cs, data)
   302  	if err != nil {
   303  		return errors.Errorf("failed to create snapshot from MemoryStorage: %s", err)
   304  	}
   305  
   306  	if err = rs.saveSnap(snap); err != nil {
   307  		return err
   308  	}
   309  
   310  	rs.snapshotIndex = append(rs.snapshotIndex, snap.Metadata.Index)
   311  
   312  	// Keep some entries in memory for slow followers to catchup
   313  	if i > rs.SnapshotCatchUpEntries {
   314  		compacti := i - rs.SnapshotCatchUpEntries
   315  		rs.lg.Debugf("Purging in-memory raft entries prior to %d", compacti)
   316  		if err = rs.ram.Compact(compacti); err != nil {
   317  			if err == raft.ErrCompacted {
   318  				rs.lg.Warnf("Raft entries prior to %d are already purged", compacti)
   319  			} else {
   320  				rs.lg.Fatalf("Failed to purge raft entries: %s", err)
   321  			}
   322  		}
   323  	}
   324  
   325  	rs.lg.Infof("Snapshot is taken at index %d", i)
   326  
   327  	rs.gc()
   328  	return nil
   329  }
   330  
   331  // gc collects etcd/raft garbage files, namely wal and snapshot files
   332  func (rs *RaftStorage) gc() {
   333  	if len(rs.snapshotIndex) < MaxSnapshotFiles {
   334  		rs.lg.Debugf("Snapshots on disk (%d) < limit (%d), no need to purge wal/snapshot",
   335  			len(rs.snapshotIndex), MaxSnapshotFiles)
   336  		return
   337  	}
   338  
   339  	rs.snapshotIndex = rs.snapshotIndex[len(rs.snapshotIndex)-MaxSnapshotFiles:]
   340  
   341  	rs.purgeWAL()
   342  	rs.purgeSnap()
   343  }
   344  
   345  func (rs *RaftStorage) purgeWAL() {
   346  	retain := rs.snapshotIndex[0]
   347  
   348  	var files []string
   349  	err := filepath.Walk(rs.walDir, func(path string, info os.FileInfo, err error) error {
   350  		if err != nil {
   351  			return err
   352  		}
   353  		if !strings.HasSuffix(path, ".wal") {
   354  			return nil
   355  		}
   356  
   357  		var seq, index uint64
   358  		_, f := filepath.Split(path)
   359  		fmt.Sscanf(f, "%016x-%016x.wal", &seq, &index)
   360  
   361  		// Only purge WAL with index lower than oldest snapshot.
   362  		// filepath.SkipDir seizes Walk without returning error.
   363  		if index >= retain {
   364  			return filepath.SkipDir
   365  		}
   366  
   367  		files = append(files, path)
   368  		return nil
   369  	})
   370  	if err != nil {
   371  		rs.lg.Errorf("Failed to read WAL directory %s: %s", rs.walDir, err)
   372  	}
   373  
   374  	if len(files) <= 1 {
   375  		// we need to keep one wal segment with index smaller than snapshot.
   376  		// see comment on wal.ReleaseLockTo for the more details.
   377  		return
   378  	}
   379  
   380  	rs.purge(files[:len(files)-1])
   381  }
   382  
   383  func (rs *RaftStorage) purgeSnap() {
   384  	var files []string
   385  	err := filepath.Walk(rs.snapDir, func(path string, info os.FileInfo, err error) error {
   386  		if err != nil {
   387  			return err
   388  		}
   389  		if strings.HasSuffix(path, ".snap") {
   390  			files = append(files, path)
   391  		} else if strings.HasSuffix(path, ".broken") {
   392  			rs.lg.Warnf("Found broken snapshot file %s, it can be removed manually", path)
   393  		}
   394  
   395  		return nil
   396  	})
   397  	if err != nil {
   398  		rs.lg.Errorf("Failed to read Snapshot directory %s: %s", rs.snapDir, err)
   399  		return
   400  	}
   401  
   402  	l := len(files)
   403  	if l <= MaxSnapshotFiles {
   404  		return
   405  	}
   406  
   407  	rs.purge(files[:l-MaxSnapshotFiles]) // retain last MaxSnapshotFiles snapshot files
   408  }
   409  
   410  func (rs *RaftStorage) purge(files []string) {
   411  	for _, file := range files {
   412  		l, err := fileutil.TryLockFile(file, os.O_WRONLY, fileutil.PrivateFileMode)
   413  		if err != nil {
   414  			rs.lg.Debugf("Failed to lock %s, abort purging", file)
   415  			break
   416  		}
   417  
   418  		if err = os.Remove(file); err != nil {
   419  			rs.lg.Errorf("Failed to remove %s: %s", file, err)
   420  		} else {
   421  			rs.lg.Debugf("Purged file %s", file)
   422  		}
   423  
   424  		if err = l.Close(); err != nil {
   425  			rs.lg.Errorf("Failed to close file lock %s: %s", l.Name(), err)
   426  		}
   427  	}
   428  }
   429  
   430  // ApplySnapshot applies snapshot to local memory storage
   431  func (rs *RaftStorage) ApplySnapshot(snap raftpb.Snapshot) {
   432  	if err := rs.ram.ApplySnapshot(snap); err != nil {
   433  		if err == raft.ErrSnapOutOfDate {
   434  			rs.lg.Warnf("Attempted to apply out-of-date snapshot at Term %d and Index %d",
   435  				snap.Metadata.Term, snap.Metadata.Index)
   436  		} else {
   437  			rs.lg.Fatalf("Unexpected programming error: %s", err)
   438  		}
   439  	}
   440  }
   441  
   442  // Close closes storage
   443  func (rs *RaftStorage) Close() error {
   444  	if err := rs.wal.Close(); err != nil {
   445  		return err
   446  	}
   447  
   448  	return nil
   449  }