github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/state/raft/storage/storage.go (about)

     1  package storage
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  
    10  	"github.com/coreos/etcd/pkg/fileutil"
    11  	"github.com/coreos/etcd/raft/raftpb"
    12  	"github.com/coreos/etcd/snap"
    13  	"github.com/coreos/etcd/wal"
    14  	"github.com/coreos/etcd/wal/walpb"
    15  	"github.com/docker/swarmkit/log"
    16  	"github.com/docker/swarmkit/manager/encryption"
    17  	"github.com/pkg/errors"
    18  )
    19  
    20  // ErrNoWAL is returned if there are no WALs on disk
    21  var ErrNoWAL = errors.New("no WAL present")
    22  
    23  type walSnapDirs struct {
    24  	wal  string
    25  	snap string
    26  }
    27  
    28  // the wal/snap directories in decreasing order of preference/version
    29  var versionedWALSnapDirs = []walSnapDirs{
    30  	{wal: "wal-v3-encrypted", snap: "snap-v3-encrypted"},
    31  	{wal: "wal-v3", snap: "snap-v3"},
    32  	{wal: "wal", snap: "snap"},
    33  }
    34  
    35  // EncryptedRaftLogger saves raft data to disk
    36  type EncryptedRaftLogger struct {
    37  	StateDir      string
    38  	EncryptionKey []byte
    39  
    40  	// FIPS specifies whether the encryption should be FIPS-compliant
    41  	FIPS bool
    42  
    43  	// mutex is locked for writing only when we need to replace the wal object and snapshotter
    44  	// object, not when we're writing snapshots or wals (in which case it's locked for reading)
    45  	encoderMu   sync.RWMutex
    46  	wal         WAL
    47  	snapshotter Snapshotter
    48  }
    49  
    50  // BootstrapFromDisk creates a new snapshotter and wal, and also reads the latest snapshot and WALs from disk
    51  func (e *EncryptedRaftLogger) BootstrapFromDisk(ctx context.Context, oldEncryptionKeys ...[]byte) (*raftpb.Snapshot, WALData, error) {
    52  	e.encoderMu.Lock()
    53  	defer e.encoderMu.Unlock()
    54  
    55  	walDir := e.walDir()
    56  	snapDir := e.snapDir()
    57  
    58  	encrypter, decrypter := encryption.Defaults(e.EncryptionKey, e.FIPS)
    59  	if oldEncryptionKeys != nil {
    60  		decrypters := []encryption.Decrypter{decrypter}
    61  		for _, key := range oldEncryptionKeys {
    62  			_, d := encryption.Defaults(key, e.FIPS)
    63  			decrypters = append(decrypters, d)
    64  		}
    65  		decrypter = encryption.NewMultiDecrypter(decrypters...)
    66  	}
    67  
    68  	snapFactory := NewSnapFactory(encrypter, decrypter)
    69  
    70  	if !fileutil.Exist(snapDir) {
    71  		// If snapshots created by the etcd-v2 code exist, or by swarmkit development version,
    72  		// read the latest snapshot and write it encoded to the new path.  The new path
    73  		// prevents etc-v2 creating snapshots that are visible to us, but not encoded and
    74  		// out of sync with our WALs, after a downgrade.
    75  		for _, dirs := range versionedWALSnapDirs[1:] {
    76  			legacySnapDir := filepath.Join(e.StateDir, dirs.snap)
    77  			if fileutil.Exist(legacySnapDir) {
    78  				if err := MigrateSnapshot(legacySnapDir, snapDir, OriginalSnap, snapFactory); err != nil {
    79  					return nil, WALData{}, err
    80  				}
    81  				break
    82  			}
    83  		}
    84  	}
    85  	// ensure the new directory exists
    86  	if err := os.MkdirAll(snapDir, 0700); err != nil {
    87  		return nil, WALData{}, errors.Wrap(err, "failed to create snapshot directory")
    88  	}
    89  
    90  	var (
    91  		snapshotter Snapshotter
    92  		walObj      WAL
    93  		err         error
    94  	)
    95  
    96  	// Create a snapshotter and load snapshot data
    97  	snapshotter = snapFactory.New(snapDir)
    98  	snapshot, err := snapshotter.Load()
    99  	if err != nil && err != snap.ErrNoSnapshot {
   100  		return nil, WALData{}, err
   101  	}
   102  
   103  	walFactory := NewWALFactory(encrypter, decrypter)
   104  	var walsnap walpb.Snapshot
   105  	if snapshot != nil {
   106  		walsnap.Index = snapshot.Metadata.Index
   107  		walsnap.Term = snapshot.Metadata.Term
   108  	}
   109  
   110  	if !wal.Exist(walDir) {
   111  		var walExists bool
   112  		// If wals created by the etcd-v2 wal code exist, read the latest ones based
   113  		// on this snapshot and encode them to wals in the new path to avoid adding
   114  		// backwards-incompatible entries to those files.
   115  		for _, dirs := range versionedWALSnapDirs[1:] {
   116  			legacyWALDir := filepath.Join(e.StateDir, dirs.wal)
   117  			if !wal.Exist(legacyWALDir) {
   118  				continue
   119  			}
   120  			if err = MigrateWALs(ctx, legacyWALDir, walDir, OriginalWAL, walFactory, walsnap); err != nil {
   121  				return nil, WALData{}, err
   122  			}
   123  			walExists = true
   124  			break
   125  		}
   126  		if !walExists {
   127  			return nil, WALData{}, ErrNoWAL
   128  		}
   129  	}
   130  
   131  	walObj, waldata, err := ReadRepairWAL(ctx, walDir, walsnap, walFactory)
   132  	if err != nil {
   133  		return nil, WALData{}, err
   134  	}
   135  
   136  	e.snapshotter = snapshotter
   137  	e.wal = walObj
   138  
   139  	return snapshot, waldata, nil
   140  }
   141  
   142  // BootstrapNew creates a new snapshotter and WAL writer, expecting that there is nothing on disk
   143  func (e *EncryptedRaftLogger) BootstrapNew(metadata []byte) error {
   144  	e.encoderMu.Lock()
   145  	defer e.encoderMu.Unlock()
   146  	encrypter, decrypter := encryption.Defaults(e.EncryptionKey, e.FIPS)
   147  	walFactory := NewWALFactory(encrypter, decrypter)
   148  
   149  	for _, dirpath := range []string{filepath.Dir(e.walDir()), e.snapDir()} {
   150  		if err := os.MkdirAll(dirpath, 0700); err != nil {
   151  			return errors.Wrapf(err, "failed to create %s", dirpath)
   152  		}
   153  	}
   154  	var err error
   155  	// the wal directory must not already exist upon creation
   156  	e.wal, err = walFactory.Create(e.walDir(), metadata)
   157  	if err != nil {
   158  		return errors.Wrap(err, "failed to create WAL")
   159  	}
   160  
   161  	e.snapshotter = NewSnapFactory(encrypter, decrypter).New(e.snapDir())
   162  	return nil
   163  }
   164  
   165  func (e *EncryptedRaftLogger) walDir() string {
   166  	return filepath.Join(e.StateDir, versionedWALSnapDirs[0].wal)
   167  }
   168  
   169  func (e *EncryptedRaftLogger) snapDir() string {
   170  	return filepath.Join(e.StateDir, versionedWALSnapDirs[0].snap)
   171  }
   172  
   173  // RotateEncryptionKey swaps out the encoders and decoders used by the wal and snapshotter
   174  func (e *EncryptedRaftLogger) RotateEncryptionKey(newKey []byte) {
   175  	e.encoderMu.Lock()
   176  	defer e.encoderMu.Unlock()
   177  
   178  	if e.wal != nil { // if the wal exists, the snapshotter exists
   179  		// We don't want to have to close the WAL, because we can't open a new one.
   180  		// We need to know the previous snapshot, because when you open a WAL you
   181  		// have to read out all the entries from a particular snapshot, or you can't
   182  		// write.  So just rotate the encoders out from under it.  We already
   183  		// have a lock on writing to snapshots and WALs.
   184  		wrapped, ok := e.wal.(*wrappedWAL)
   185  		if !ok {
   186  			panic(fmt.Errorf("EncryptedRaftLogger's WAL is not a wrappedWAL"))
   187  		}
   188  
   189  		wrapped.encrypter, wrapped.decrypter = encryption.Defaults(newKey, e.FIPS)
   190  
   191  		e.snapshotter = NewSnapFactory(wrapped.encrypter, wrapped.decrypter).New(e.snapDir())
   192  	}
   193  	e.EncryptionKey = newKey
   194  }
   195  
   196  // SaveSnapshot actually saves a given snapshot to both the WAL and the snapshot.
   197  func (e *EncryptedRaftLogger) SaveSnapshot(snapshot raftpb.Snapshot) error {
   198  
   199  	walsnap := walpb.Snapshot{
   200  		Index: snapshot.Metadata.Index,
   201  		Term:  snapshot.Metadata.Term,
   202  	}
   203  
   204  	e.encoderMu.RLock()
   205  	if err := e.wal.SaveSnapshot(walsnap); err != nil {
   206  		e.encoderMu.RUnlock()
   207  		return err
   208  	}
   209  
   210  	snapshotter := e.snapshotter
   211  	e.encoderMu.RUnlock()
   212  
   213  	if err := snapshotter.SaveSnap(snapshot); err != nil {
   214  		return err
   215  	}
   216  	return e.wal.ReleaseLockTo(snapshot.Metadata.Index)
   217  }
   218  
   219  // GC garbage collects snapshots and wals older than the provided index and term
   220  func (e *EncryptedRaftLogger) GC(index uint64, term uint64, keepOldSnapshots uint64) error {
   221  	// Delete any older snapshots
   222  	curSnapshot := fmt.Sprintf("%016x-%016x%s", term, index, ".snap")
   223  
   224  	snapshots, err := ListSnapshots(e.snapDir())
   225  	if err != nil {
   226  		return err
   227  	}
   228  
   229  	// Ignore any snapshots that are older than the current snapshot.
   230  	// Delete the others. Rather than doing lexical comparisons, we look
   231  	// at what exists before/after the current snapshot in the slice.
   232  	// This means that if the current snapshot doesn't appear in the
   233  	// directory for some strange reason, we won't delete anything, which
   234  	// is the safe behavior.
   235  	curSnapshotIdx := -1
   236  	var (
   237  		removeErr      error
   238  		oldestSnapshot string
   239  	)
   240  
   241  	for i, snapFile := range snapshots {
   242  		if curSnapshotIdx >= 0 && i > curSnapshotIdx {
   243  			if uint64(i-curSnapshotIdx) > keepOldSnapshots {
   244  				err := os.Remove(filepath.Join(e.snapDir(), snapFile))
   245  				if err != nil && removeErr == nil {
   246  					removeErr = err
   247  				}
   248  				continue
   249  			}
   250  		} else if snapFile == curSnapshot {
   251  			curSnapshotIdx = i
   252  		}
   253  		oldestSnapshot = snapFile
   254  	}
   255  
   256  	if removeErr != nil {
   257  		return removeErr
   258  	}
   259  
   260  	// Remove any WAL files that only contain data from before the oldest
   261  	// remaining snapshot.
   262  
   263  	if oldestSnapshot == "" {
   264  		return nil
   265  	}
   266  
   267  	// Parse index out of oldest snapshot's filename
   268  	var snapTerm, snapIndex uint64
   269  	_, err = fmt.Sscanf(oldestSnapshot, "%016x-%016x.snap", &snapTerm, &snapIndex)
   270  	if err != nil {
   271  		return errors.Wrapf(err, "malformed snapshot filename %s", oldestSnapshot)
   272  	}
   273  
   274  	wals, err := ListWALs(e.walDir())
   275  	if err != nil {
   276  		return err
   277  	}
   278  
   279  	found := false
   280  	deleteUntil := -1
   281  
   282  	for i, walName := range wals {
   283  		var walSeq, walIndex uint64
   284  		_, err = fmt.Sscanf(walName, "%016x-%016x.wal", &walSeq, &walIndex)
   285  		if err != nil {
   286  			return errors.Wrapf(err, "could not parse WAL name %s", walName)
   287  		}
   288  
   289  		if walIndex >= snapIndex {
   290  			deleteUntil = i - 1
   291  			found = true
   292  			break
   293  		}
   294  	}
   295  
   296  	// If all WAL files started with indices below the oldest snapshot's
   297  	// index, we can delete all but the newest WAL file.
   298  	if !found && len(wals) != 0 {
   299  		deleteUntil = len(wals) - 1
   300  	}
   301  
   302  	for i := 0; i < deleteUntil; i++ {
   303  		walPath := filepath.Join(e.walDir(), wals[i])
   304  		l, err := fileutil.TryLockFile(walPath, os.O_WRONLY, fileutil.PrivateFileMode)
   305  		if err != nil {
   306  			return errors.Wrapf(err, "could not lock old WAL file %s for removal", wals[i])
   307  		}
   308  		err = os.Remove(walPath)
   309  		l.Close()
   310  		if err != nil {
   311  			return errors.Wrapf(err, "error removing old WAL file %s", wals[i])
   312  		}
   313  	}
   314  
   315  	return nil
   316  }
   317  
   318  // SaveEntries saves only entries to disk
   319  func (e *EncryptedRaftLogger) SaveEntries(st raftpb.HardState, entries []raftpb.Entry) error {
   320  	e.encoderMu.RLock()
   321  	defer e.encoderMu.RUnlock()
   322  
   323  	if e.wal == nil {
   324  		return fmt.Errorf("raft WAL has either been closed or has never been created")
   325  	}
   326  	return e.wal.Save(st, entries)
   327  }
   328  
   329  // Close closes the logger - it will have to be bootstrapped again to start writing
   330  func (e *EncryptedRaftLogger) Close(ctx context.Context) {
   331  	e.encoderMu.Lock()
   332  	defer e.encoderMu.Unlock()
   333  
   334  	if e.wal != nil {
   335  		if err := e.wal.Close(); err != nil {
   336  			log.G(ctx).WithError(err).Error("error closing raft WAL")
   337  		}
   338  	}
   339  
   340  	e.wal = nil
   341  	e.snapshotter = nil
   342  }
   343  
   344  // Clear closes the existing WAL and removes the WAL and snapshot.
   345  func (e *EncryptedRaftLogger) Clear(ctx context.Context) error {
   346  	e.encoderMu.Lock()
   347  	defer e.encoderMu.Unlock()
   348  
   349  	if e.wal != nil {
   350  		if err := e.wal.Close(); err != nil {
   351  			log.G(ctx).WithError(err).Error("error closing raft WAL")
   352  		}
   353  	}
   354  	e.snapshotter = nil
   355  
   356  	os.RemoveAll(e.walDir())
   357  	os.RemoveAll(e.snapDir())
   358  	return nil
   359  }